From c128450a8b041223c142854d60a73b23aa37e120 Mon Sep 17 00:00:00 2001
From: Scott Todd <scott.todd0@gmail.com>
Date: Thu, 9 May 2024 15:08:33 -0700
Subject: [PATCH] Deployed d92a43dbf8 with MkDocs version: 1.6.0

---
 .../index.html                                |   6 +-
 .../index.html                                |  10 +-
 developers/general/contributing/index.html    | 443 +++++++++---------
 search/search_index.json                      |   2 +-
 sitemap.xml                                   | 170 +++----
 sitemap.xml.gz                                | Bin 1018 -> 1018 bytes
 6 files changed, 325 insertions(+), 306 deletions(-)
diff --git a/community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/index.html b/community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/index.html
index c885e32f37fd..b3ec6d3d39e5 100755
--- a/community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/index.html
+++ b/community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/index.html
@@ -3855,7 +3855,11 @@ <h2 id="overview-of-the-compilation-and-linking-flow">Overview of the compilatio
   asm
   object
   vmfb
-end</code></pre>
+end
+
+style Part1 stroke:#FDD835,stroke-width:2px
+style Part2 stroke:#039BE5,stroke-width:2px
+style Part3 stroke:#43A047,stroke-width:2px</code></pre>
 <h2 id="part-1-mlir-code-generation">🟨 Part 1: MLIR code generation<a class="headerlink" href="#part-1-mlir-code-generation" title="Permanent link">link</a></h2>
 <p>Some initial boilerplate happens <em>around</em> our <code>linalg.matmul</code> before anything
 interesting happens <em>to</em> it.:</p>
diff --git a/community/blog/2024-01-29-iree-mlir-linalg-tutorial/index.html b/community/blog/2024-01-29-iree-mlir-linalg-tutorial/index.html
index 9d11cdf9ac1e..3b2139312dad 100755
--- a/community/blog/2024-01-29-iree-mlir-linalg-tutorial/index.html
+++ b/community/blog/2024-01-29-iree-mlir-linalg-tutorial/index.html
@@ -3864,13 +3864,15 @@ <h3 id="static-shape-element-wise-addition-of-two-1d-arrays">Static-shape, eleme
   <code>--iree-hal-target-backends=</code>. You will then need to pass a matching
   <code>--device=</code> to <code>iree-run-module</code> below.</li>
 <li>To cross-compile, explore <code>--iree-llvmcpu-target-triple=</code>.</li>
-<li>To enable higher CPU performance by enabling CPU features:</li>
+<li>To enable higher CPU performance by enabling CPU features:<ul>
 <li>On x86, explore <code>--iree-llvmcpu-target-cpu=</code> (e.g.
-    <code>--iree-llvmcpu-target-cpu=znver4</code> to target AMD Zen4).</li>
+  <code>--iree-llvmcpu-target-cpu=znver4</code> to target AMD Zen4).</li>
 <li>On other architectures, explore <code>--iree-llvmcpu-target-cpu-features=</code>.</li>
 <li>To optimize for running on the same machine that the compilation ran
-    on, pass  <code>--iree-llvmcpu-target-cpu=host</code>. That works regardless of
-    CPU architecture.</li>
+  on, pass  <code>--iree-llvmcpu-target-cpu=host</code>. That works regardless of
+  CPU architecture.</li>
+</ul>
+</li>
 <li>Check out
   <a href="../../../developers/general/developer-tips/">these docs</a> for
   more useful <code>iree-compile</code> flags.</li>
diff --git a/developers/general/contributing/index.html b/developers/general/contributing/index.html
index 0f7e43c265ac..36a7eab651d8 100755
--- a/developers/general/contributing/index.html
+++ b/developers/general/contributing/index.html
@@ -2134,6 +2134,15 @@
     </span>
   </a>
   
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#developer-certificate-of-origin" class="md-nav__link">
+    <span class="md-ellipsis">
+      Developer Certificate of Origin
+    </span>
+  </a>
+  
 </li>
         
           <li class="md-nav__item">
@@ -2146,18 +2155,33 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#coding-style-guidelines" class="md-nav__link">
+  <a href="#authors-codeowners-and-maintainers" class="md-nav__link">
     <span class="md-ellipsis">
-      Coding style guidelines
+      AUTHORS, CODEOWNERS, and MAINTAINERS
     </span>
   </a>
   
 </li>
         
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#coding-policies" class="md-nav__link">
+    <span class="md-ellipsis">
+      Coding policies
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Coding policies">
+      <ul class="md-nav__list">
+        
           <li class="md-nav__item">
-  <a href="#code-reviews" class="md-nav__link">
+  <a href="#coding-style-guidelines" class="md-nav__link">
     <span class="md-ellipsis">
-      Code reviews
+      Coding style guidelines
     </span>
   </a>
   
@@ -2170,6 +2194,30 @@
     </span>
   </a>
   
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#github-policies" class="md-nav__link">
+    <span class="md-ellipsis">
+      GitHub policies
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="GitHub policies">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#code-reviews" class="md-nav__link">
+    <span class="md-ellipsis">
+      Code reviews
+    </span>
+  </a>
+  
 </li>
         
           <li class="md-nav__item">
@@ -2200,9 +2248,9 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#credits-in-the-authors-file" class="md-nav__link">
+  <a href="#branch-naming" class="md-nav__link">
     <span class="md-ellipsis">
-      Credits in the AUTHORS file
+      Branch naming
     </span>
   </a>
   
@@ -2258,15 +2306,6 @@
     </span>
   </a>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#custom-managed-runners" class="md-nav__link">
-    <span class="md-ellipsis">
-      Custom managed runners
-    </span>
-  </a>
-  
 </li>
         
           <li class="md-nav__item">
@@ -2281,39 +2320,6 @@
       </ul>
     </nav>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#git-workflows" class="md-nav__link">
-    <span class="md-ellipsis">
-      Git workflows
-    </span>
-  </a>
-  
-    <nav class="md-nav" aria-label="Git workflows">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#setup" class="md-nav__link">
-    <span class="md-ellipsis">
-      Setup
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#git-config" class="md-nav__link">
-    <span class="md-ellipsis">
-      Git config
-    </span>
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
 </li>
         
       </ul>
@@ -3736,6 +3742,15 @@
     </span>
   </a>
   
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#developer-certificate-of-origin" class="md-nav__link">
+    <span class="md-ellipsis">
+      Developer Certificate of Origin
+    </span>
+  </a>
+  
 </li>
         
           <li class="md-nav__item">
@@ -3748,18 +3763,33 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#coding-style-guidelines" class="md-nav__link">
+  <a href="#authors-codeowners-and-maintainers" class="md-nav__link">
     <span class="md-ellipsis">
-      Coding style guidelines
+      AUTHORS, CODEOWNERS, and MAINTAINERS
     </span>
   </a>
   
 </li>
         
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#coding-policies" class="md-nav__link">
+    <span class="md-ellipsis">
+      Coding policies
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Coding policies">
+      <ul class="md-nav__list">
+        
           <li class="md-nav__item">
-  <a href="#code-reviews" class="md-nav__link">
+  <a href="#coding-style-guidelines" class="md-nav__link">
     <span class="md-ellipsis">
-      Code reviews
+      Coding style guidelines
     </span>
   </a>
   
@@ -3772,6 +3802,30 @@
     </span>
   </a>
   
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#github-policies" class="md-nav__link">
+    <span class="md-ellipsis">
+      GitHub policies
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="GitHub policies">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#code-reviews" class="md-nav__link">
+    <span class="md-ellipsis">
+      Code reviews
+    </span>
+  </a>
+  
 </li>
         
           <li class="md-nav__item">
@@ -3802,9 +3856,9 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#credits-in-the-authors-file" class="md-nav__link">
+  <a href="#branch-naming" class="md-nav__link">
     <span class="md-ellipsis">
-      Credits in the AUTHORS file
+      Branch naming
     </span>
   </a>
   
@@ -3860,15 +3914,6 @@
     </span>
   </a>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#custom-managed-runners" class="md-nav__link">
-    <span class="md-ellipsis">
-      Custom managed runners
-    </span>
-  </a>
-  
 </li>
         
           <li class="md-nav__item">
@@ -3883,39 +3928,6 @@
       </ul>
     </nav>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#git-workflows" class="md-nav__link">
-    <span class="md-ellipsis">
-      Git workflows
-    </span>
-  </a>
-  
-    <nav class="md-nav" aria-label="Git workflows">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#setup" class="md-nav__link">
-    <span class="md-ellipsis">
-      Setup
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#git-config" class="md-nav__link">
-    <span class="md-ellipsis">
-      Git config
-    </span>
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
 </li>
         
       </ul>
@@ -3960,7 +3972,64 @@ <h2 id="developer-policies">Developer policies<a class="headerlink" href="#devel
 <h3 id="code-of-conduct"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M8.048 2.241c.964-.709 2.079-1.238 3.325-1.241a4.616 4.616 0 0 1 3.282 1.355c.41.408.757.86.996 1.428.238.568.348 1.206.347 1.968 0 2.193-1.505 4.254-3.081 5.862-1.496 1.526-3.213 2.796-4.249 3.563l-.22.163a.749.749 0 0 1-.895 0l-.221-.163c-1.036-.767-2.753-2.037-4.249-3.563C1.51 10.008.007 7.952.002 5.762a4.614 4.614 0 0 1 1.353-3.407C3.123.585 6.223.537 8.048 2.24Zm-1.153.983c-1.25-1.033-3.321-.967-4.48.191a3.115 3.115 0 0 0-.913 2.335c0 1.556 1.109 3.24 2.652 4.813C5.463 11.898 6.96 13.032 8 13.805c.353-.262.758-.565 1.191-.905l-1.326-1.223a.75.75 0 0 1 1.018-1.102l1.48 1.366c.328-.281.659-.577.984-.887L9.99 9.802a.75.75 0 1 1 1.019-1.103l1.384 1.28c.295-.329.566-.661.81-.995L12.92 8.7l-1.167-1.168c-.674-.671-1.78-.664-2.474.03-.268.269-.538.537-.802.797-.893.882-2.319.843-3.185-.032-.346-.35-.693-.697-1.043-1.047a.75.75 0 0 1-.04-1.016c.162-.191.336-.401.52-.623.62-.748 1.356-1.637 2.166-2.417Zm7.112 4.442c.313-.65.491-1.293.491-1.916v-.001c0-.614-.088-1.045-.23-1.385-.143-.339-.357-.633-.673-.949a3.111 3.111 0 0 0-2.218-.915c-1.092.003-2.165.627-3.226 1.602-.823.755-1.554 1.637-2.228 2.45l-.127.154.562.566a.755.755 0 0 0 1.066.02l.794-.79c1.258-1.258 3.312-1.31 4.594-.032.396.394.792.791 1.173 1.173Z"/></svg></span> Code of conduct<a class="headerlink" href="#code-of-conduct" title="Permanent link">link</a></h3>
 <p>This project follows the
 <a href="https://github.com/openxla/community/blob/main/CODE-OF-CONDUCT.md">OpenXLA Code of Conduct</a>.</p>
+<h3 id="developer-certificate-of-origin"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"/></svg></span> Developer Certificate of Origin<a class="headerlink" href="#developer-certificate-of-origin" title="Permanent link">link</a></h3>
+<p>Contributors must certify that they wrote or otherwise have the right to submit
+the code they are contributing to the project.</p>
+<details class="quote">
+<summary>Expand to read the full DCO agreement text</summary>
+<p>By making a contribution to this project, I certify that:</p>
+<ol>
+<li>
+<p>The contribution was created in whole or in part by me and I have the
+  right to submit it under the open source license indicated in the file; or</p>
+</li>
+<li>
+<p>The contribution is based upon previous work that, to the best of my
+  knowledge, is covered under an appropriate open source license and I have
+  the right under that license to submit that work with modifications, whether
+  created in whole or in part by me, under the same open source license
+  (unless I am permitted to submit under a different license), as indicated
+  in the file; or</p>
+</li>
+<li>
+<p>The contribution was provided directly to me by some other person who
+  certified 1., 2. or 3. and I have not modified it.</p>
+</li>
+<li>
+<p>I understand and agree that this project and the contribution are public
+  and that a record of the contribution (including all personal information
+  I submit with it, including my sign-off) is maintained indefinitely and
+  may be redistributed consistent with this project or the open source
+  license(s) involved.</p>
+</li>
+</ol>
+</details>
+<p>Signing is enforced by the <a href="https://github.com/apps/dco">DCO GitHub App</a>. This
+requires that all commits included in pull requests include a <code>Signed-off-by</code>
+line:</p>
+<div class="highlight"><pre><span></span><code>This is my commit message
+
+Signed-off-by: Random J Developer &lt;random@developer.example.org&gt;
+</code></pre></div>
+<ul>
+<li>
+<p>Git will automatically append this message if you use the <code>-s</code> option:</p>
+<div class="highlight"><pre><span></span><code>git<span class="w"> </span>commit<span class="w"> </span>-s<span class="w"> </span>-m<span class="w"> </span><span class="s1">&#39;This is my commit message&#39;</span>
+</code></pre></div>
+</li>
+<li>
+<p>Users of <a href="https://code.visualstudio.com/">Visual Studio Code</a> can add
+  <code>"git.alwaysSignOff": true,</code> in their settings.</p>
+</li>
+<li>
+<p>For more information about DCO enforcement and git workflows, see the
+  <a href="https://github.com/dcoapp/app">dcoapp/app</a> repository.</p>
+</li>
+</ul>
 <h3 id="contributor-license-agreement"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"/></svg></span> Contributor License Agreement<a class="headerlink" href="#contributor-license-agreement" title="Permanent link">link</a></h3>
+<div class="admonition info -">
+<p class="admonition-title">CLA is being replaced with DCO. Both are enabled while we migrate.</p>
+</div>
 <p>Contributions to this project must be accompanied by a Contributor License
 Agreement (CLA). Head over to <a href="https://cla.developers.google.com/">https://cla.developers.google.com/</a> to see
 your current agreements on file or to sign a new one.</p>
@@ -3972,6 +4041,26 @@ <h3 id="contributor-license-agreement"><span class="twemoji"><svg xmlns="http://
   one (even if it was for a different project), you probably don't need to do it
   again.</li>
 </ul>
+<h3 id="authors-codeowners-and-maintainers"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M2 5.5a3.5 3.5 0 1 1 5.898 2.549 5.508 5.508 0 0 1 3.034 4.084.75.75 0 1 1-1.482.235 4 4 0 0 0-7.9 0 .75.75 0 0 1-1.482-.236A5.507 5.507 0 0 1 3.102 8.05 3.493 3.493 0 0 1 2 5.5ZM11 4a3.001 3.001 0 0 1 2.22 5.018 5.01 5.01 0 0 1 2.56 3.012.749.749 0 0 1-.885.954.752.752 0 0 1-.549-.514 3.507 3.507 0 0 0-2.522-2.372.75.75 0 0 1-.574-.73v-.352a.75.75 0 0 1 .416-.672A1.5 1.5 0 0 0 11 5.5.75.75 0 0 1 11 4Zm-5.5-.5a2 2 0 1 0-.001 3.999A2 2 0 0 0 5.5 3.5Z"/></svg></span> AUTHORS, CODEOWNERS, and MAINTAINERS<a class="headerlink" href="#authors-codeowners-and-maintainers" title="Permanent link">link</a></h3>
+<p>The <a href="https://github.com/iree-org/iree/blob/main/AUTHORS"><code>AUTHORS</code> file</a> keeps
+track of those who have made significant contributions to the project.</p>
+<ul>
+<li>If you would like additional recognition for your contributions, you may add
+  yourself or your organization (please add the entity who owns the copyright
+  for your contributions).</li>
+<li>The source control history remains the most accurate source for individual
+  contributions.</li>
+</ul>
+<p>The
+<a href="https://github.com/iree-org/iree/blob/main/.github/CODEOWNERS"><code>.github/CODEOWNERS</code> file</a>
+lets maintainers opt in to PR reviews modifying certain paths.</p>
+<ul>
+<li>Review is not required from a code owner, though it is recommended.</li>
+</ul>
+<p>The
+<a href="https://github.com/iree-org/iree/blob/main/MAINTAINERS.md"><code>MAINTAINERS.md</code> file</a>
+documents official maintainers for project components.</p>
+<h2 id="coding-policies"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Coding policies<a class="headerlink" href="#coding-policies" title="Permanent link">link</a></h2>
 <h3 id="coding-style-guidelines"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M11.013 1.427a1.75 1.75 0 0 1 2.474 0l1.086 1.086a1.75 1.75 0 0 1 0 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 0 1-.927-.928l.929-3.25c.081-.286.235-.547.445-.758l8.61-8.61Zm.176 4.823L9.75 4.81l-6.286 6.287a.253.253 0 0 0-.064.108l-.558 1.953 1.953-.558a.253.253 0 0 0 .108-.064Zm1.238-3.763a.25.25 0 0 0-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 0 0 0-.354Z"/></svg></span> Coding style guidelines<a class="headerlink" href="#coding-style-guidelines" title="Permanent link">link</a></h3>
 <p>Most of the code style is derived from the
 <a href="http://google.github.io/styleguide/">Google Style Guides</a> for the appropriate
@@ -3992,6 +4081,12 @@ <h3 id="coding-style-guidelines"><span class="twemoji"><svg xmlns="http://www.w3
 <a href="https://github.com/iree-org/iree/blob/main/build_tools/scripts/lint.sh"><code>build_tools/scripts/lint.sh</code></a>
 can also be used to run the full suite of lint checks.</p>
 </details>
+<h3 id="testing-policy"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 2v2h1v14a4 4 0 0 0 4 4 4 4 0 0 0 4-4V4h1V2H7m4 14c-.6 0-1-.4-1-1s.4-1 1-1 1 .4 1 1-.4 1-1 1m2-4c-.6 0-1-.4-1-1s.4-1 1-1 1 .4 1 1-.4 1-1 1m1-5h-4V4h4v3Z"/></svg></span> Testing policy<a class="headerlink" href="#testing-policy" title="Permanent link">link</a></h3>
+<p>With few exceptions, features should be accompanied by automated tests.</p>
+<p>We use a mix of in-tree and out-of-tree unit and integration tests. For more
+information about the types of tests used across the project, refer to the
+<a href="../testing-guide/">testing guide</a>.</p>
+<h2 id="github-policies"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg></span> GitHub policies<a class="headerlink" href="#github-policies" title="Permanent link">link</a></h2>
 <h3 id="code-reviews"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 13H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 14.543V13H1.75A1.75 1.75 0 0 1 0 11.25v-8.5C0 1.784.784 1 1.75 1ZM1.5 2.75v8.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm5.28 1.72a.75.75 0 0 1 0 1.06L5.31 7l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.75.75 0 0 1 1.06 0Zm2.44 0a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L10.69 7 9.22 5.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Code reviews<a class="headerlink" href="#code-reviews" title="Permanent link">link</a></h3>
 <p>All submissions, including submissions by maintainers, require review. We
 use GitHub pull requests (PRs) for this purpose. Consult
@@ -4005,11 +4100,6 @@ <h3 id="code-reviews"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/s
   working for a certain situation, please ask as we bias towards pragmatism for
   cases that require it.</li>
 </ul>
-<h3 id="testing-policy"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 2v2h1v14a4 4 0 0 0 4 4 4 4 0 0 0 4-4V4h1V2H7m4 14c-.6 0-1-.4-1-1s.4-1 1-1 1 .4 1 1-.4 1-1 1m2-4c-.6 0-1-.4-1-1s.4-1 1-1 1 .4 1 1-.4 1-1 1m1-5h-4V4h4v3Z"/></svg></span> Testing policy<a class="headerlink" href="#testing-policy" title="Permanent link">link</a></h3>
-<p>With few exceptions, features should be accompanied by automated tests.</p>
-<p>We use a mix of in-tree and out-of-tree unit and integration tests. For more
-information about the types of tests used across the project, refer to the
-<a href="../testing-guide/">testing guide</a>.</p>
 <h3 id="github-actions-workflows"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M.41 13.41 6 19l1.41-1.42L1.83 12m20.41-6.42L11.66 16.17 7.5 12l-1.43 1.41L11.66 19l12-12M18 7l-1.41-1.42-6.35 6.35 1.42 1.41L18 7Z"/></svg></span> GitHub Actions workflows<a class="headerlink" href="#github-actions-workflows" title="Permanent link">link</a></h3>
 <p>We use <a href="https://docs.github.com/en/actions">GitHub Actions</a> to automatically
 build and test various parts of the project.</p>
@@ -4017,8 +4107,9 @@ <h3 id="github-actions-workflows"><span class="twemoji"><svg xmlns="http://www.w
 <li>Most presubmit workflows will only run automatically on PRs if you are a
   project collaborator. Otherwise a maintainer must
   <a href="https://docs.github.com/en/actions/managing-workflow-runs/approving-workflow-runs-from-public-forks">approve workflow runs</a>.
-  If you are sending code changes to the project, please ask to be added as a
-  collaborator, so that these can run automatically.</li>
+  If you are sending code changes to the project, please
+  <a href="#obtaining-commit-access">request commit access</a>, so that these can run
+  automatically.</li>
 <li>It is generally expected that PRs will only be merged when all checks are
   passing. In some cases, pre-existing failures may be bypassed by a maintainer.</li>
 </ul>
@@ -4028,9 +4119,6 @@ <h3 id="github-actions-workflows"><span class="twemoji"><svg xmlns="http://www.w
 <a href="#ci-behavior-manipulation">CI behavior manipulation</a> section below to
 learn how to customize this behavior.</p>
 </details>
-<!-- TODO(scotttodd): link to infrastructure / CI docs when they exist -->
-<!-- TODO(scotttodd): link to "obtaining commit access" -->
-
 <h3 id="merging-approved-changes"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"/></svg></span> Merging approved changes<a class="headerlink" href="#merging-approved-changes" title="Permanent link">link</a></h3>
 <p>After review and presubmit checks, PRs should typically be merged using
 "squash and merge".</p>
@@ -4082,18 +4170,40 @@ <h3 id="obtaining-commit-access"><span class="twemoji"><svg xmlns="http://www.w3
 <a href="https://github.com/shark-infra/">shark-infra</a>. Reach out to a project
 member if you would also like access to repositories in those organizations.</p>
 </div>
-<h3 id="credits-in-the-authors-file"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M2 5.5a3.5 3.5 0 1 1 5.898 2.549 5.508 5.508 0 0 1 3.034 4.084.75.75 0 1 1-1.482.235 4 4 0 0 0-7.9 0 .75.75 0 0 1-1.482-.236A5.507 5.507 0 0 1 3.102 8.05 3.493 3.493 0 0 1 2 5.5ZM11 4a3.001 3.001 0 0 1 2.22 5.018 5.01 5.01 0 0 1 2.56 3.012.749.749 0 0 1-.885.954.752.752 0 0 1-.549-.514 3.507 3.507 0 0 0-2.522-2.372.75.75 0 0 1-.574-.73v-.352a.75.75 0 0 1 .416-.672A1.5 1.5 0 0 0 11 5.5.75.75 0 0 1 11 4Zm-5.5-.5a2 2 0 1 0-.001 3.999A2 2 0 0 0 5.5 3.5Z"/></svg></span> Credits in the AUTHORS file<a class="headerlink" href="#credits-in-the-authors-file" title="Permanent link">link</a></h3>
-<p>If you would like additional recognition for your contributions, you may add
-yourself or your organization to the
-<a href="https://github.com/iree-org/iree/blob/main/AUTHORS">AUTHORS file</a> that keeps
-track of those who have made significant contributions to the project.</p>
-<ul>
-<li>Please add the entity who owns the copyright for your contribution.</li>
-<li>The source control history remains the most accurate source for individual
-  contributions.</li>
-</ul>
-<!-- TODO(scotttodd): merge the sections below into "developer overview"? -->
-
+<h3 id="branch-naming"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"/></svg></span> Branch naming<a class="headerlink" href="#branch-naming" title="Permanent link">link</a></h3>
+<p>Most work should be done on
+<a href="https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks">repository forks</a>.
+For developers with write access, when creating a branch in the common
+<a href="https://github.com/iree-org/iree">iree-org/iree repository</a>, please follow
+these naming guidelines:</p>
+<table>
+<thead>
+<tr>
+<th>Branch type</th>
+<th>Naming scheme</th>
+<th>Example</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Single user</td>
+<td><code>users/[username]/*</code></td>
+<td><code>users/cooldeveloper/my-awesome-feature</code></td>
+</tr>
+<tr>
+<td>Shared feature branch</td>
+<td><code>shared/*</code></td>
+<td><code>shared/pytorch-performance-sprint</code></td>
+</tr>
+<tr>
+<td>Dependency updates</td>
+<td><code>integrates/*</code></td>
+<td><code>integrates/integrate-llvm-20240501</code></td>
+</tr>
+</tbody>
+</table>
+<p>Branches that do not meet these guidelines may be deleted, especially if
+they <a href="https://github.com/iree-org/iree/branches/stale">appear to be stale</a>.</p>
 <h2 id="tips-for-contributors">Tips for contributors<a class="headerlink" href="#tips-for-contributors" title="Permanent link">link</a></h2>
 <h3 id="tool-recommendations">Tool recommendations<a class="headerlink" href="#tool-recommendations" title="Permanent link">link</a></h3>
 <table>
@@ -4144,10 +4254,6 @@ <h4 id="self-hosted-runners">Self-hosted runners<a class="headerlink" href="#sel
 custom configurations such as accelerators. Configuration scripting is checked
 in to this repository (see the
 <a href="https://github.com/iree-org/iree/blob/main/build_tools/github_actions/runner/README.md">README for that directory</a>).</p>
-<h4 id="custom-managed-runners">Custom managed runners<a class="headerlink" href="#custom-managed-runners" title="Permanent link">link</a></h4>
-<p>In addition to our self-hosted runners, we use GitHub's
-<a href="https://docs.github.com/en/actions/using-github-hosted-runners/about-larger-runners">large managed runners</a>
-for some platforms.</p>
 <h4 id="ci-behavior-manipulation">CI behavior manipulation<a class="headerlink" href="#ci-behavior-manipulation" title="Permanent link">link</a></h4>
 <p>The setup step of the CI determines which CI jobs to run. This is controlled by
 the
@@ -4251,99 +4357,6 @@ <h5 id="ci-configuration-recipes">CI configuration recipes<a class="headerlink"
 <p><img alt="ci-extra" src="../contributing-ci-extra.png" /></p>
 <p>The enabled jobs can be viewed from the Summary page of an action run:</p>
 <p><img alt="ci_enabled_jobs" src="../contributing-ci-enabled-jobs.png" /></p>
-<h3 id="git-workflows">Git workflows<a class="headerlink" href="#git-workflows" title="Permanent link">link</a></h3>
-<p>We tend to use the "triangular" or "forking" workflow. Develop primarily on a
-clone of the repository on your development machine. Any local branches named
-the same as persistent branches from the
-<a href="https://github.com/iree-org/iree">main repository</a> are pristine (though
-potentially stale) copies. You only fastforward these to match upstream and
-otherwise do development on other branches. When sending PRs, you push to a
-different branch on your public fork and create the PR from there.</p>
-<!-- TODO(scotttodd): screenshots / diagrams here
-  (https://mermaid.js.org/syntax/gitgraph.html?) -->
-
-<h4 id="setup">Setup<a class="headerlink" href="#setup" title="Permanent link">link</a></h4>
-<ol>
-<li>
-<p>Create a fork of the main repository.</p>
-</li>
-<li>
-<p>Create a local git repository with remotes <code>upstream</code> (the main repository)
-    and <code>origin</code> (your personal fork). To list your current remotes
-    <code>git remote -v</code>.</p>
-<p>a. If you already cloned from the main repository (e.g. by following the
-getting started guide):</p>
-<div class="highlight"><pre><span></span><code><span class="c1"># From your existing git repo</span>
-$<span class="w"> </span>git<span class="w"> </span>remote<span class="w"> </span>rename<span class="w"> </span>origin<span class="w"> </span>upstream
-$<span class="w"> </span>git<span class="w"> </span>remote<span class="w"> </span>add<span class="w"> </span>origin<span class="w"> </span>https://github.com/&lt;github_username&gt;/iree.git
-</code></pre></div>
-<p>b. If you haven't already cloned:</p>
-<div class="highlight"><pre><span></span><code><span class="c1"># From whatever directory under which you want to nest your repo</span>
-$<span class="w"> </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/&lt;github_username&gt;/iree.git
-$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>iree
-$<span class="w"> </span>git<span class="w"> </span>remote<span class="w"> </span>add<span class="w"> </span>upstream<span class="w"> </span>https://github.com/iree-org/iree.git
-</code></pre></div>
-<p>This is especially important for maintainers who have write access (so can
-push directly to the main repository) and admins who have elevated
-privileges (so can push directly to protected branches).</p>
-<p>These names are just suggestions, but you might find some scripts where the
-defaults are for remotes named like this.</p>
-<p>For extra safety, you can make it difficult to push directly to upstream by
-setting the push url to something invalid:
-<code>git remote set-url --push upstream DISABLE</code>, which requires re-enabling the
-push URL explicitly before pushing. You can wrap this behavior in a custom
-git command like
-<a href="https://gist.github.com/GMNGeoffrey/42dd9a9792390094a43bdb69659320c0">git-sudo</a>.</p>
-</li>
-<li>
-<p>Use a script like
-    <a href="https://github.com/iree-org/iree/blob/main/build_tools/scripts/git/git_update.sh">git_update.sh</a>
-    to easily synchronize <code>main</code> with <code>upstream</code>. Submodules make this is a
-    little trickier than it should be. You can also turn this into a git command
-    by adding it to your path as <code>git-update</code>.</p>
-</li>
-</ol>
-<h4 id="git-config">Git config<a class="headerlink" href="#git-config" title="Permanent link">link</a></h4>
-<p>These are some additional options you could put in your top-level <code>.gitconfig</code>
-or repository-specific <code>.git/config</code> files that are conducive the recommended
-workflow</p>
-<!-- TODO(scotttodd): move to auto-collapsed tip -->
-
-<div class="highlight"><pre><span></span><code><span class="k">[push]</span>
-<span class="w">  </span><span class="na">default</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">current</span>
-<span class="k">[alias]</span>
-<span class="w">  </span><span class="c1"># Delete branches that you pushed and have been deleted upstream, e.g. because</span>
-<span class="w">  </span><span class="c1"># the PR was merged.</span>
-<span class="w">  </span><span class="na">gone</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">! &quot;git fetch -p  &amp;&amp; git for-each-ref --format &#39;%(refname:short) %(upstream:track)&#39; | awk &#39;$2 == \&quot;[gone]\&quot; {print $1}&#39; | xargs -r git branch -D&quot;</span>
-<span class="w">  </span><span class="c1"># Update from upstream (custom command) and delete obsolete local branches.</span>
-<span class="w">  </span><span class="na">sync</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">! (git update main &amp;&amp; git gone)</span>
-<span class="w">  </span><span class="c1"># Create a new branch based off of main (requires a clean working directory).</span>
-<span class="w">  </span><span class="na">new</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;!f(){ \\\ngit checkout main &amp;&amp; git switch -c $1</span><span class="c1">; \\\n}; f&quot;</span>
-<span class="w">  </span><span class="c1"># Display branches in a useful &quot;latest last&quot; format</span>
-<span class="w">  </span><span class="na">br</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">for-each-ref --sort=committerdate refs/heads/ --format=&#39;%(HEAD) %(color:yellow)%(refname:short)%(color:reset) - %(color:red)%(objectname:short)%(color:reset) - %(contents:subject) (%(color:green)%(committerdate:relative)%(color:reset))&#39;</span>
-<span class="w">  </span><span class="c1"># `git git foo` -&gt; `git foo` typo fixer</span>
-<span class="w">  </span><span class="na">git</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;!f(){ \\\n git \&quot;$@\&quot;</span><span class="c1">; \\\n}; f&quot;</span>
-<span class="w">  </span><span class="c1"># Get the git root directory</span>
-<span class="w">  </span><span class="na">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">rev-parse --show-toplevel</span>
-<span class="w">  </span><span class="c1"># checkout, but also sync submodules</span>
-<span class="w">  </span><span class="na">ch</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;!f() { \\\n git checkout \&quot;$@\&quot;</span><span class="c1">; git submodule sync &amp;&amp; git submodule update --init; \\\n}; f&quot;</span>
-<span class="w">  </span><span class="c1"># See the diff for a PR branch vs the main branch</span>
-<span class="w">  </span><span class="na">diffmain</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">diff --merge-base main</span>
-<span class="w">  </span><span class="c1"># See only the files that differ vs the main branch</span>
-<span class="w">  </span><span class="na">whatsout</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">diffmain --name-only</span>
-<span class="k">[checkout]</span>
-<span class="w">  </span><span class="c1"># If the checkout command</span>
-<span class="w">  </span><span class="na">defaultRemote</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">origin</span>
-<span class="k">[pull]</span>
-<span class="w">  </span><span class="c1"># When pulling, only complete the pull if its a clean fast forward.</span>
-<span class="w">  </span><span class="na">ff</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">only</span>
-<span class="k">[remote]</span>
-<span class="w">  </span><span class="c1"># Push to your fork (origin) by default</span>
-<span class="w">  </span><span class="na">pushDefault</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">origin</span>
-<span class="k">[url &quot;ssh://git@github.com/&quot;]</span>
-<span class="w">  </span><span class="c1"># Pull with https (so no auth required), but push with ssh.</span>
-<span class="w">  </span><span class="na">pushInsteadOf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">https://github.com/</span>
-</code></pre></div>
 
 
 
diff --git a/search/search_index.json b/search/search_index.json
index bcf3e4cb21c8..83f5a95f90c7 100755
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"IREE","text":"<p>IREE (Intermediate Representation Execution Environment<sup>1</sup>) is an MLIR-based end-to-end compiler and runtime that lowers Machine Learning (ML) models to a unified IR that scales up to meet the needs of the datacenter and down to satisfy the constraints and special considerations of mobile and edge deployments.</p>"},{"location":"#key-features","title":"Key features","text":"<ul> <li> <p> Ahead-of-time compilation</p> <p>Scheduling and execution logic are compiled together</p> <p> Project architecture</p> </li> <li> <p> Support for advanced model features</p> <p>Dynamic shapes, flow control, streaming, and more</p> <p> Importing from ML frameworks</p> </li> <li> <p> Designed for CPUs, GPUs, and other accelerators</p> <p>First class support for many popular devices and APIs</p> <p> Deployment configurations</p> </li> <li> <p> Low overhead, pipelined execution</p> <p>Efficient power and resource usage on server and edge devices</p> <p> Benchmarking</p> </li> <li> <p> Binary size as low as 30KB on embedded systems</p> <p> Running on bare-metal</p> </li> <li> <p> Debugging and profiling support</p> <p> Profiling with Tracy</p> </li> </ul>"},{"location":"#support-matrix","title":"Support matrix","text":"<p>IREE supports importing from a variety of ML frameworks:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and TensorFlow Lite</li> </ul> <p>The IREE compiler tools run on  Linux,  Windows, and  macOS and can generate efficient code for a variety of runtime platforms:</p> <ul> <li> Linux</li> <li> Windows</li> <li> macOS</li> <li> Android</li> <li> iOS</li> <li> Bare metal</li> <li> WebAssembly (experimental)</li> </ul> <p>and architectures:</p> <ul> <li> ARM</li> <li> x86</li> <li> RISC-V</li> </ul> <p>Support for hardware accelerators and APIs is also included:</p> <ul> <li> Vulkan</li> <li> CUDA</li> <li> ROCm</li> <li> Metal (for Apple silicon devices)</li> <li> AMD AIE (experimental)</li> <li> WebGPU (experimental)</li> </ul>"},{"location":"#project-architecture","title":"Project architecture","text":"<p>IREE adopts a holistic approach towards ML model compilation: the IR produced contains both the scheduling logic, required to communicate data dependencies to low-level parallel pipelined hardware/API like Vulkan, and the execution logic, encoding dense computation on the hardware in the form of hardware/API-specific binaries like SPIR-V.</p> <p> </p>"},{"location":"#workflow-overview","title":"Workflow overview","text":"<p>Using IREE involves the following general steps:</p> <ol> <li> <p>Import your model</p> <p>Develop your program using one of the supported frameworks, then import into IREE</p> </li> <li> <p>Select your deployment configuration</p> <p>Identify your target platform, accelerator(s), and other constraints</p> </li> <li> <p>Compile your model</p> <p>Compile through IREE, picking settings based on your deployment configuration</p> </li> <li> <p>Run your model</p> <p>Use IREE's runtime components to execute your compiled model</p> </li> </ol>"},{"location":"#importing-models-from-ml-frameworks","title":"Importing models from ML frameworks","text":"<p>IREE supports importing models from a growing list of ML frameworks and model formats:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and    TensorFlow Lite</li> </ul>"},{"location":"#selecting-deployment-configurations","title":"Selecting deployment configurations","text":"<p>IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE for dynamic model deployments taking advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.</p> <ul> <li>What platforms are you targeting? Desktop? Mobile? An embedded system?</li> <li>What hardware should the bulk of your model run on? CPU? GPU?</li> <li>How fixed is your model itself? Can the weights be changed? Do you want   to support loading different model architectures dynamically?</li> </ul> <p>IREE supports the full set of these configurations using the same underlying technology.</p>"},{"location":"#compiling-models","title":"Compiling models","text":"<p>Model compilation is performed ahead-of-time on a host machine for any combination of targets. The compilation process converts from layers and operators used by high level frameworks down into optimized native code and associated scheduling logic.</p> <p>For example, compiling for GPU execution using Vulkan generates SPIR-V kernels and Vulkan API calls. For CPU execution, native code with static or dynamic linkage and the associated function calls are generated.</p>"},{"location":"#running-models","title":"Running models","text":"<p>IREE offers a low level C API, as well as several sets of API bindings for compiling and running programs using various languages.</p>"},{"location":"#communication-channels","title":"Communication channels","text":"<ul> <li> GitHub issues: Feature requests,   bugs, and other work tracking</li> <li> IREE Discord server: Daily development   discussions with the core team and collaborators</li> <li> iree-discuss email list:   Announcements, general and low-priority discussion</li> </ul>"},{"location":"#roadmap","title":"Roadmap","text":"<p>IREE is in the early stages of development and is not yet ready for broad adoption. We use both GitHub Projects and GitHub Milestones to track progress.</p> <ol> <li> <p>Pronounced \"eerie\" and often styled with the   emoji\u00a0\u21a9</p> </li> </ol>"},{"location":"building-from-source/","title":"Building from source","text":"<p>While IREE does offer binary distributions for its compiler tools and Python bindings, building from source is still useful when using IREE's runtime or when making changes to the compiler or import tools themselves.</p>"},{"location":"building-from-source/#reference-pages","title":"Reference pages","text":"<ul> <li>Getting started</li> <li>Android cross-compilation</li> <li>iOS cross-compilation</li> <li>RISC-V cross-compilation</li> </ul>"},{"location":"building-from-source/android/","title":"Android cross-compilation","text":"<p>Running on a platform like Android involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific Android version and system architecture):</p> <ul> <li>IREE's compiler is built on the host and is used there to generate modules   for the target</li> <li>IREE's runtime is built on the host for the target. The runtime is then   either pushed to the target to run natively or is bundled into an Android   APK</li> </ul>","tags":["Android"]},{"location":"building-from-source/android/#prerequisites","title":"Prerequisites","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-environment-setup","title":"Host environment setup","text":"<p>You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.</p>","tags":["Android"]},{"location":"building-from-source/android/#install-android-ndk-and-adb","title":"Install Android NDK and ADB","text":"<p>The Android Native Developer Kit (NDK) is needed to use native C/C++ code on Android. You can download it here, or, if you have installed Android Studio, you can follow this guide instead.</p> <p>Note</p> <p>Make sure the <code>ANDROID_NDK</code> environment variable is set after installing the NDK.</p> <p>ADB (the Android Debug Bridge) is also needed to communicate with Android devices from the command line. Install it following the official user guide.</p>","tags":["Android"]},{"location":"building-from-source/android/#configure-and-build","title":"Configure and build","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-configuration","title":"Host configuration","text":"<p>Build and install on your host machine:</p> <pre><code>cmake -GNinja -B ../iree-build/ \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  .\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["Android"]},{"location":"building-from-source/android/#target-configuration","title":"Target configuration","text":"<p>Build the runtime using the Android NDK toolchain:</p>  Linux macOS Windows <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"%ANDROID_NDK%/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"%CD%/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <p>Note</p> <p>See the Android NDK CMake guide and Android Studio CMake guide for details on configuring CMake for Android.</p> <p>The specific <code>ANDROID_ABI</code> and <code>ANDROID_PLATFORM</code> used should match your target device.</p>","tags":["Android"]},{"location":"building-from-source/android/#running-android-tests","title":"Running Android tests","text":"<p>Make sure you enable developer options and USB debugging on your Android device and can see your it when you run <code>adb devices</code>, then run all tests through ctest:</p> <pre><code># Build test dependencies\ncmake --build ../iree-build-android/ --target iree-test-deps\n\n# Ensure that your Android device is visible\nadb devices\n\n# Run tests\nctest --test-dir ../iree-build-android/ --output-on-failure\n</code></pre> <p>This will automatically upload build artifacts to the connected Android device, run the tests, then report the status back to your host machine.</p>","tags":["Android"]},{"location":"building-from-source/android/#running-tools-directly","title":"Running tools directly","text":"<p>Invoke the host compiler tools to produce a bytecode module FlatBuffer:</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>Push the Android runtime tools to the device, along with any FlatBuffer files:</p> <pre><code>adb push ../iree-build-android/tools/iree-run-module /data/local/tmp/\nadb shell chmod +x /data/local/tmp/iree-run-module\nadb push /tmp/simple_abs_vmvx.vmfb /data/local/tmp/\n</code></pre> <p>Run the tool:</p> <pre><code>adb shell /data/local/tmp/iree-run-module --device=local-task \\\n  --module=/data/local/tmp/simple_abs_vmvx.vmfb \\\n  --function=abs \\\n  --input=\"f32=-5\"\n</code></pre>","tags":["Android"]},{"location":"building-from-source/getting-started/","title":"Getting started","text":""},{"location":"building-from-source/getting-started/#prerequisites","title":"Prerequisites","text":"<p>IREE can be built from source using CMake. We also recommend the Ninja CMake generator and the clang or MSVC C/C++ compilers.</p> Note - Other CMake generators and compilers <p>IREE developers and CIs primarily use Ninja, clang, and MSVC. Other configurations (including the Makefile generator and gcc) are \"best effort\". Patches to improve support are always welcome.</p>  Linux macOS Windows <ol> <li> <p>Install a compiler/linker (typically \"clang\" and \"lld\" package)</p> </li> <li> <p>Install CMake (typically \"cmake\" package)</p> </li> <li> <p>Install Ninja (typically \"ninja-build\"    package)</p> </li> </ol> <p>On Debian/Ubuntu:</p> <pre><code>sudo apt install cmake ninja-build clang lld\n</code></pre> <ol> <li> <p>Install CMake</p> </li> <li> <p>Install Ninja</p> </li> </ol> <p>If using Homebrew:</p> <pre><code>brew install cmake ninja\n</code></pre> <ol> <li> <p>Install MSVC from Visual Studio or \"Tools for Visual Studio\" on the    official downloads page</p> </li> <li> <p>Install CMake from the    official downloads page</p> </li> <li> <p>Install Ninja from the official site</p> </li> </ol> <p>Note</p> <p>Initialize MSVC by running <code>vcvarsall.bat</code> to build on the command line. See the official documentation for details.</p>"},{"location":"building-from-source/getting-started/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":"<p>Use Git to clone the IREE repository and initialize its submodules:</p> <pre><code>git clone https://github.com/iree-org/iree.git\ncd iree\ngit submodule update --init\n</code></pre> <p>The most basic CMake workflow is:</p> <pre><code># Configure\ncmake -G Ninja -B ../iree-build/ .\n\n# Build\ncmake --build ../iree-build/\n</code></pre> <p>Caution - slow builds</p> <p>The compiler build is complex. You will want a powerful machine and to tune the settings following the next section. In 2023, we've seen builds take around 5-10 minutes on 64-core Linux machines.</p> <p>Use case permitting, disabling the compiler build with <code>-DIREE_BUILD_COMPILER=OFF</code> will drastically simplify the build.</p>"},{"location":"building-from-source/getting-started/#configuration-settings","title":"Configuration settings","text":"<p>The configure step should be customized for your build environment. These settings can improve compile and link times substantially.</p>  Linux macOS Windows <pre><code># Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON \\\n    -DIREE_ENABLE_SPLIT_DWARF=ON \\\n    -DIREE_ENABLE_THIN_ARCHIVES=ON \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DIREE_ENABLE_LLD=ON\n</code></pre> <pre><code># Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON \\\n    -DIREE_ENABLE_SPLIT_DWARF=ON \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DIREE_ENABLE_LLD=ON\n</code></pre> <p>It is also possible to add <code>-DIREE_ENABLE_THIN_ARCHIVES=ON</code> if the <code>CMAKE_AR</code> variable is defined and points to the path of either the GNU binutils or LLVM <code>ar</code> program, overriding the default Apple <code>ar</code>.</p> <pre><code># Recommended development options:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON\n</code></pre> Tip - CMAKE_BUILD_TYPE values <p>We recommend using the <code>RelWithDebInfo</code> build type by default for a good balance of debug info and performance. The <code>Debug</code>, <code>Release</code>, and <code>MinSizeRel</code> build types are useful in more specific cases. Note that several useful LLVM debugging features are only available in <code>Debug</code> builds. See the official CMake documentation for general details.</p> Tip - Faster recompilation with ccache <p>We recommend using <code>ccache</code> with CMake, especially when rebuilding the compiler. To use it, configure CMake with:</p> <pre><code>-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n</code></pre> <p>See also our developer documentation for ccache.</p>"},{"location":"building-from-source/getting-started/#optional-components","title":"Optional components","text":"<p>By default, the CMake build includes:</p> <ul> <li>All compiler targets (<code>llvm-cpu</code>, <code>cuda</code>, <code>vulkan-spirv</code>, etc.)</li> <li>All runtime HAL drivers (<code>local-task</code>, <code>cuda</code>, <code>vulkan</code>, etc.)</li> <li>All compiler input formats (StableHLO, TOSA, etc.)</li> <li>All compiler output formats (VM bytecode, C)</li> </ul> <p>The default build does not include:</p> <ul> <li>Compiler or runtime bindings (Python, TFLite, etc.)</li> <li>Advanced features like AddressSanitizer or tracing instrumentation</li> <li>Experimental components</li> </ul> <p>These can be changed via the <code>IREE_</code> CMake options listed in the root <code>CMakeLists.txt</code>.</p>"},{"location":"building-from-source/getting-started/#extensions-and-integrations","title":"Extensions and integrations","text":"<p>When using IREE within other projects, you can register compiler plugins and runtime HAL drivers. You can also bring your own copy of LLVM and some other tools. See the root <code>CMakeLists.txt</code> for details.</p>"},{"location":"building-from-source/getting-started/#tests-and-samples","title":"Tests and samples","text":""},{"location":"building-from-source/getting-started/#running-tests","title":"Running tests","text":"<p>Tests are run via ctest. To build and run the core project tests:</p> <pre><code># Build default targets\ncmake --build ../iree-build/\n\n# Run tests\nctest --test-dir ../iree-build/\n</code></pre> <p>Caution</p> <p>This has two limitations:</p> <ol> <li>Large tests are excluded from the build by default</li> <li>Some tests require hardware like a GPU and will fail on unsupported systems</li> </ol> <p>To build and then run all tests:</p> <pre><code># 1. Build default targets\ncmake --build ../iree-build/\n\n# 2. Build test dependencies\ncmake --build ../iree-build/ --target iree-test-deps\n\n# 3. Run tests\nctest --test-dir ../iree-build/\n\n\n# Or combine all steps using a utility target\ncmake --build ../iree-build --target iree-run-tests\n</code></pre> <p>To run only certain tests, we have a helper script that converts environment variables into ctest filters:</p> <pre><code># Run default tests\n./build_tools/cmake/ctest_all.sh ../iree-build\n\n# Run tests, turning CUDA on and Vulkan off\nexport IREE_CUDA_DISABLE=0\nexport IREE_VULKAN_DISABLE=1\n./build_tools/cmake/ctest_all.sh ../iree-build\n</code></pre>"},{"location":"building-from-source/getting-started/#running-samples","title":"Running samples","text":"<pre><code># Build\ncmake --build ../iree-build/\n\n# Run a standalone sample application\n../iree-build/runtime/src/iree/runtime/demo/hello_world_embedded\n# 4xf32=1 1.1 1.2 1.3\n#  *\n# 4xf32=10 100 1000 10000\n#  =\n# 4xf32=10 110 1200 13000\n\n# Try out the developer tools\nls ../iree-build/tools/\n../iree-build/tools/iree-compile --help\n../iree-build/tools/iree-run-module --help\n</code></pre>"},{"location":"building-from-source/getting-started/#python-bindings","title":"Python bindings","text":"<p>Python packages can either be built from source or installed from our releases. See the Python bindings page for details about the bindings themselves.</p>"},{"location":"building-from-source/getting-started/#dependencies","title":"Dependencies","text":"<p>You will need a recent Python installation &gt;=3.9 (we aim to support non-eol Python versions).</p> Tip - Managing Python versions <p>Make sure your 'python' is what you expect:</p>  Linux macOS Windows <p>Note that on multi-python systems, this may have a version suffix, and on many Linuxes where python2 and python3 can co-exist, you may also want to use <code>python3</code>.</p> <pre><code>which python\npython --version\n</code></pre> <p>Note that on multi-python systems, this may have a version suffix, and on macOS where python2 and python3 can co-exist, you may also want to use <code>python3</code>.</p> <pre><code>which python\npython --version\n</code></pre> <p>The Python launcher for Windows (<code>py</code>) can help manage versions.</p> <pre><code>which python\npython --version\npy --list-paths\n</code></pre> Tip - Virtual environments <p>We recommend using virtual environments to manage python packages, such as through <code>venv</code> (about, tutorial):</p>  Linux macOS Windows <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\n.venv\\Scripts\\activate.bat\n</code></pre> <p>When done, run <code>deactivate</code>.</p> <pre><code># Upgrade PIP before installing other requirements\npython -m pip install --upgrade pip\n\n# Install IREE build requirements\npython -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre>"},{"location":"building-from-source/getting-started/#building-with-cmake","title":"Building with CMake","text":"<p>To build the Python bindings, configure CMake with the <code>IREE_BUILD_PYTHON_BINDINGS</code> option. We also recommend explicitly setting which Python executable to use with <code>Python3_EXECUTABLE</code>:</p> <pre><code># Configure (including other options as discussed above)\ncmake -G Ninja -B ../iree-build/ \\\n  -DIREE_BUILD_PYTHON_BINDINGS=ON  \\\n  -DPython3_EXECUTABLE=\"$(which python)\" \\\n  .\n\n# Build\ncmake --build ../iree-build/\n</code></pre>"},{"location":"building-from-source/getting-started/#using-the-python-bindings","title":"Using the Python bindings","text":"<p>Extend your <code>PYTHONPATH</code> with IREE's <code>bindings/python</code> paths and try importing:</p>  Linux macOS Windows <pre><code>source ../iree-build/.env &amp;&amp; export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <pre><code>source ../iree-build/.env &amp;&amp; export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <pre><code>..\\iree-build\\.env.ps1  # or ..\\iree-build\\.env.bat\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <p>Using IREE's ML framework importers requires a few extra steps:</p> <pre><code># Install test requirements\npython -m pip install -r integrations/tensorflow/test/requirements.txt\n\n# Install pure Python packages (no build required)\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n\n# Then test the tools:\niree-import-tf --help\niree-import-tflite --help\n</code></pre>"},{"location":"building-from-source/ios/","title":"iOS cross-compilation","text":"<p>Cross-compilation for iOS consists of the two steps below.</p> <ul> <li>On the macOS host, build the IREE compiler.  We can run it to create   IREE modules.</li> <li>Build the IREE runtime on the macOS host for iOS devices and the   simulator.  We can then run the IREE module on the simulator.</li> </ul>","tags":["iOS"]},{"location":"building-from-source/ios/#prerequisites","title":"Prerequisites","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#install-xcode-and-ios-sdk","title":"Install Xcode and iOS SDK","text":"<p>For cross-compilation, you need Xcode. It comes with the SDKs for iOS devices and the simulator, as well as the <code>simctl</code> tool for controlling the simulator from the command line.</p>","tags":["iOS"]},{"location":"building-from-source/ios/#host-environment-setup","title":"Host environment setup","text":"<p>On your host platform, you should already be able to build IREE from source.  Please make sure you've gone through the steps in getting started.</p>","tags":["iOS"]},{"location":"building-from-source/ios/#configure-and-build","title":"Configure and build","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#build-the-iree-compiler-for-the-host","title":"Build the IREE compiler for the Host","text":"<p>Build and install on your macOS host:</p> <pre><code>cmake -S . -B ../iree-build/ -GNinja \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install\n\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/ios/#cross-compile-the-iree-runtime-for-ios","title":"Cross-compile the IREE runtime for iOS","text":"<p>Build the runtime for the iOS Simulator.</p> <pre><code>cmake -S . -B ../build-ios-sim -GNinja \\\n  -DCMAKE_SYSTEM_NAME=iOS \\\n  -DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphonesimulator Path) \\\n  -DCMAKE_OSX_ARCHITECTURES=arm64 \\\n  -DCMAKE_SYSTEM_PROCESSOR=arm64 \\\n  -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 \\\n  -DCMAKE_IOS_INSTALL_COMBINED=YES \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DCMAKE_INSTALL_PREFIX=../build-ios-sim/install \\\n  -DIREE_BUILD_COMPILER=OFF\n\ncmake --build ../build-ios-sim --config Release --target install\n</code></pre> <p>Or, we can build the runtime for iOS devices it by changing the value of the <code>-DCMAKE OSX SYSROOT</code> option to:</p> <pre><code>  -DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphoneos Path)\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/ios/#running-iree-modules-on-the-ios-simulator","title":"Running IREE modules on the iOS Simulator","text":"<p>Run the IREE compiler on the host to generate a module.</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>We could test the generated module by running the macOS version of <code>iree-run-module</code> on the host.</p> <pre><code>../iree-build/install/bin/iree-run-module \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=\"f32=-5\"\n</code></pre> <p>To run it on the iOS simulator, we need to copy the vmfb file into the <code>iree-run-module</code> iOS app bundle.</p> <pre><code>cp /tmp/simple_abs_vmvx.vmfb \\\n   ../build-ios-sim/install/bin/iree-run-module.app/\n</code></pre> <p>Open the iOS Simulator Manager on the host.</p> <pre><code>open -a Simulator\n</code></pre> <p>After creating and booting a simulator in this app, you can list it from the command-line.</p> <pre><code>xcrun simctl list devices | grep Booted\n</code></pre> <p>This is what should come out of the command:</p> <pre><code>    iPhone 14 Pro (12341234-ABCD-ABCD-ABCD-123412341234) (Booted)\n</code></pre> <p>where <code>iPhone 14 Pro</code> is the device being simulated and <code>12341234-ABCD-ABCD-ABCD-123412341234</code> is the simulator's unique device ID (UDID).</p> <p>Install the app <code>iree-run-module</code> on the simulator, given its UDID.</p> <pre><code>xcrun simctl install &lt;UDID&gt; ../build-ios-sim/install/bin/iree-run-module.app\n</code></pre> <p>Check the path to the installed bundle, where the <code>simple_abs_vmvx.vmfb</code> module should be found.</p> <pre><code>ls $(xcrun simctl get_app_container &lt;UDID&gt; dev.iree.iree-run-module)\n</code></pre> <p>The string <code>dev.iree.iree-run-module</code> is the bundle identifier of the iOS app.  The CMake building process generates it and saves it in the property list (plist) file <code>../build-ios-sim/install/bin/iree-run-module.app/Info.plist</code>.</p> <p>Launch the <code>iree-run-module</code> app on the simulator to run the IREE module <code>simple_abs_vmvx.vmfb</code>.</p> <pre><code>xcrun simctl launch --console \\\n  &lt;UDID&gt; \\\n  dev.iree.runmodule \\\n  --device=local-task \\\n  --function=abs \\\n  --input=\"f32=-5\" \\\n  --module=$(xcrun simctl get_app_container &lt;UDID&gt; dev.iree.iree-run-module)/simple_abs_vmvx.vmfb\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/riscv/","title":"RISC-V cross-compilation","text":"<p>Running on a platform like RISC-V involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific RISC-V CPU architecture and operating system):</p> <ul> <li>IREE's compiler is built on the host and is used there to generate modules   for the target</li> <li>IREE's runtime is built on the host for the target. The runtime is then   pushed to the target to run natively.</li> </ul>","tags":["CPU"]},{"location":"building-from-source/riscv/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-environment-setup","title":"Host environment setup","text":"<p>You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#install-risc-v-cross-compile-toolchain-and-emulator","title":"Install RISC-V cross-compile toolchain and emulator","text":"<p>You'll need a RISC-V LLVM compilation toolchain and a RISC-V enabled QEMU emulator.</p> <p>See instructions in the following links</p> <ul> <li>Clang getting started</li> <li>RISC-V GNU toolchain</li> <li>QEMU</li> <li>RISC-V Linux QEMU</li> </ul> <p>Note</p> <p>The <code>RISCV_TOOLCHAIN_ROOT</code> environment variable needs to be set to the root directory of the installed GNU toolchain when building the RISC-V compiler target and the runtime library.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#install-prebuilt-risc-v-tools-risc-v-64-bit-linux-toolchain","title":"Install prebuilt RISC-V tools (RISC-V 64-bit Linux toolchain)","text":"<p>Execute the following script to download the prebuilt RISC-V toolchain and QEMU from the IREE root directory:</p> <pre><code>./build_tools/riscv/riscv_bootstrap.sh\n</code></pre> <p>Note</p> <p>The prebuilt toolchain is built with AlmaLinux release 8.8 docker It requires glibc &gt;= 2.28 for your host machine.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#support-vector-extension","title":"Support vector extension","text":"<p>For RISC-V vector extensions support, see additional instructions</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#configure-and-build","title":"Configure and build","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-configuration","title":"Host configuration","text":"<p>Build and install on your host machine:</p> <pre><code>cmake -GNinja -B ../iree-build/ \\\n  -DCMAKE_C_COMPILER=clang \\\n  -DCMAKE_CXX_COMPILER=clang++ \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  .\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#target-configuration","title":"Target configuration","text":"<p>The following instruction shows how to build for a RISC-V 64-bit Linux machine. For other RISC-V targets, please refer to riscv.toolchain.cmake as a reference of how to set up the cmake configuration.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#risc-v-64-bit-linux-target","title":"RISC-V 64-bit Linux target","text":"<pre><code>cmake -GNinja -B ../iree-build-riscv/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"./build_tools/cmake/riscv.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=$(realpath ../iree-build/install/bin) \\\n  -DRISCV_CPU=linux-riscv_64 \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  -DRISCV_TOOLCHAIN_ROOT=${RISCV_TOOLCHAIN_ROOT} \\\n  -DIREE_ENABLE_CPUINFO=OFF \\\n  .\ncmake --build ../iree-build-riscv/\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#running-iree-bytecode-modules-on-the-risc-v-system","title":"Running IREE bytecode modules on the RISC-V system","text":"<p>Note</p> <p>The following instructions are meant for the RISC-V 64-bit Linux target. For the bare-metal target, please refer to simple_embedding to see how to build a ML workload for a bare-metal machine.</p> <p>Set the path to qemu-riscv64 Linux emulator binary in the <code>QEMU_BIN</code> environment variable. If it is installed with <code>riscv_bootstrap.sh</code>, the path is default at ${HOME}/riscv/qemu/linux/RISCV/bin/qemu-riscv64.</p> <pre><code>export QEMU_BIN=&lt;path to qemu-riscv64 binary&gt;\n</code></pre> <p>Invoke the host compiler tools to produce a bytecode module FlatBuffer:</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>Run the RISC-V emulation:</p> <pre><code>${QEMU_BIN} \\\n  -cpu rv64 \\\n  -L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n  ../iree-build-riscv/tools/iree-run-module \\\n  --device=local-task \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --function=abs \\\n  --input=f32=-5\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#optional-configuration","title":"Optional configuration","text":"<p>RISC-V Vector extensions allows SIMD  code to run more efficiently. To enable the vector extension for the compiler  toolchain and the emulator, build the tools from the following sources:</p> <ul> <li>RISC-V toolchain is built from https://github.com/llvm/llvm-project.<ul> <li>Currently, the LLVM compiler is built on GNU toolchain, including libgcc,   GNU linker, and C libraries. You need to build GNU toolchain first.</li> <li>Clone GNU toolchain from:   https://github.com/riscv/riscv-gnu-toolchain.   Switch the \"riscv-binutils\" submodule to   <code>git://sourceware.org/git/binutils-gdb.git</code> manually.</li> </ul> </li> <li>RISC-V QEMU is built from https://gitlab.com/qemu-project/qemu/tree/v8.1.2.</li> </ul> <p>The SIMD code can be generated following the IREE CPU flow with the additional command-line flags</p> <pre><code>tools/iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-target-triple=riscv64 \\\n  --iree-llvmcpu-target-abi=lp64d \\\n  --iree-llvmcpu-target-cpu-features=\"+m,+a,+f,+d,+zvl512b,+v\" \\\n  --riscv-v-fixed-length-vector-lmul-max=8 \\\n  iree_input.mlir -o mobilenet_cpu.vmfb\n</code></pre> <p>Then run on the RISC-V QEMU:</p> <pre><code>${QEMU_BIN} \\\n  -cpu rv64,Zve64d=true,vlen=512,elen=64,vext_spec=v1.0 \\\n  -L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n  ../iree-build-riscv/tools/iree-run-module \\\n  --device=local-task \\\n  --module=mobilenet_cpu.vmfb \\\n  --function=predict \\\n  --input=\"1x224x224x3xf32=0\"\n</code></pre>","tags":["CPU"]},{"location":"community/","title":"Community projects","text":"<p>Projects built by community members:</p> <ul> <li> <p>The SHARK and   SRT projects offer highly tuned performance   and user interfaces for running a large corpus of machine learning programs.</p> </li> <li> <p>The SHARK-Turbine project provides   tools for bridging between PyTorch and IREE.</p> </li> <li> <p>The IREE Bare-Metal Arm Sample   shows how to build IREE with the   Arm GNU Toolchain   for bare-metal Arm targets using the open-source firmware libraries   CMSIS and   libopencm3.</p> </li> <li> <p>The IREE C++ Template   shows one way to integrate IREE's runtime into a project with CMake.</p> </li> </ul> <p>Official repositories:</p> <ul> <li> <p>iree-jax is home to   IREE's AOT support for JAX programs.</p> </li> <li> <p>iree-experimental   includes various samples and prototypes built with IREE.</p> </li> <li> <p>iree-llvm-sandbox   contains experimental work by the IREE team closely related to LLVM and   MLIR, usually with the aim of contributing back to those upstream projects.</p> </li> </ul>"},{"location":"community/blog/","title":"Blog","text":"<p>Updates from the IREE team</p>"},{"location":"community/blog/2021-10-15-cuda-backend/","title":"CUDA backend","text":"<p>IREE is being designed with re-targetability as a core goal: it should be possible to use IREE to target a broad spectrum of power regimes, from embedded systems to distributed clusters; and it should be possible to extend IREE to target new back-ends without having to reinvent the wheel each time.</p> <p>To explore this, we recently branched out from our initial focus on low-latency mobile deployments with a goal of using IREE to target data center workloads on Nvidia CUDA. This post describes how we quickly brought up a CUDA back-end for IREE and used it to train BERT, then shares some metrics and next steps.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#bring-up","title":"Bring up","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hal-support","title":"HAL support","text":"<p>IREE has a HAL API that abstract all the targets behind a common interface. The first step to supporting a CUDA target was to map the HAL API onto CUDA. We use the CUDA driver API to reduce dependencies and be closer to the hardware. The HAL API is based on other GPU APIs like Vulkan and Metal, so it was a natural fit for CUDA. The HAL API exposes memory allocations, basic fill and memset commands, kernel dispatch, and general command buffer handling. The original implementation uses the CUDA graph API as a graph maps naturally to command buffers. There is also an implementation using CUDA streams for comparison.</p> <p>HAL exposes an API that can be tested independently, even if we are not able to create CUDA kernels yet we can test a large portion of the CUDA driver using CTS tests. Those can be run to make sure a system has the required CUDA support.</p> <p></p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#compiler-support","title":"Compiler support","text":"<p>CUDA has an open source backend in LLVM generating PTX that we are leveraging. Therefore IREE can create NVVM (CUDA LLVM variant) and use LLVM's backend to generate PTX. The CUDA driver will do the \"last mile compilation\" at runtime to convert PTX into the GPU's native ISA.</p> <p>IREE compiler pipeline starts from linalg with tensor operands. A large part of the compiler is independent of the target.</p> <p>The linalg on tensor representation of the graph is broken up into dispatch regions that are processed by NVVM Codegen. A simple implementation of the compiler is to run bufferization and convert linalg to standard followed by conversion to NVVM/LLVM. Most of those transformation can re-use upstream MLIR transformations and share it with any other backend targeting LLVM IR. Leveraging MLIR conversion to LLVM will allow us to quickly go from a simple \"hello world\" to supporting full models.</p> <p>IREE code generation is based on MLIR infrastructure so each step can easily be tested independently using the MLIR lit framework.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#flatbuffer-definition","title":"FlatBuffer definition","text":"<p>Kernels are encoded in a FlatBuffer containing the PTX code as well as the workgroup size to use for the dispatch. This allows serialization of the kernels in the IR, it is then de-serialized by the HAL layer.</p> <pre><code>table CUDAExecutableDef {\n  // A map of entry point ordinals to string names as used in the shader\n  // library.\n  entry_points:[string];\n\n  // Block sizes for each entry point.\n  block_sizes:[CUDABlockSizeDef];\n\n  // PTX string of the module.\n  ptx_image:string;\n}\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hello-world","title":"Hello world","text":"<p>Together those 3 steps are enough to provide most of the functionality and we can now successfully compile full models.</p> <p></p> <p>To reproduce running a simple op end to end through CUDA backend, save the following mlir in <code>/tmp/add.mlir</code> and then run the following given commands:</p> <pre><code>func.func @add(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt; {\n  %0 = tensor.empty() : tensor&lt;4xf32&gt;\n  %1 = linalg.generic {\n    indexing_maps = [\n      affine_map&lt;(d0) -&gt; (d0)&gt;, affine_map&lt;(d0) -&gt; (d0)&gt;, affine_map&lt;(d0) -&gt; (d0)&gt;], iterator_types = [\"parallel\"]}\n      ins(%arg0, %arg1 : tensor&lt;4xf32&gt;, tensor&lt;4xf32&gt;)\n      outs(%0 : tensor&lt;4xf32&gt;) {\n  ^bb0(%in: f32, %in_0: f32, %out: f32):\n    %2 = arith.addf %in, %in_0 : f32\n    linalg.yield %2 : f32\n  } -&gt; tensor&lt;4xf32&gt;\n  return %1 : tensor&lt;4xf32&gt;\n}\n</code></pre> <pre><code># First compile into a VM bytecode module.\n$ ../iree-build/tools/iree-compile \\\n  --iree-hal-target-backends=cuda \\\n  /tmp/add.mlir \\\n  -o /tmp/add.vmfb\n\n# Run the module through CUDA HAL backend.\n$ ../iree-build/tools/iree-run-module \\\n  --device=cuda \\\n  --module=/tmp/add.vmfb \\\n  --function=add \\\n  --input=\"4xf32=[1 2 3 4]\" \\\n  --input=\"4xf32=[2 2 2 2]\"\n\nEXEC @add\n4xf32=3 4 5 6\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#performance","title":"Performance","text":"<p>Now that we have enabled functionality we need to look at the performance. Once again we can leverage existing MLIR transformations to speed up the developement work.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#tiling-and-distribution","title":"Tiling and distribution","text":"<p>The first obvious step to get efficient code on CUDA is to make sure we distribute the work on enough blocks and threads to fill up the GPU. At the time of bring up not all ops were being tiled and distributed in the common IREE layer. During dispatch region creation we apply tile and fuse which will distribute the work into a set of workgroups that are mapped to CUDA blocks.</p> <p>At the beginning of the code generation we look at the dispatch region and decide on the tile size for a workgroup. For CUDA we also decide the number of threads per block. We will then have a pass tiling the ops in the dispatch region a second time to distribute the work onto threads within the block.</p> <p>At this stage the IR looks like the following:</p> <pre><code>    %8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -&gt; index\n    %9 = affine.apply affine_map&lt;()[s0] -&gt; (s0 * 4)&gt;()[%8]\n    %10 = memref.subview %in0[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %11 = memref.subview %in1[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %12 = memref.subview %out[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    linalg.generic {\n        indexing_maps = [affine_map&lt;(d0) -&gt; (d0)&gt;,\n                         affine_map&lt;(d0) -&gt; (d0)&gt;,\n                         affine_map&lt;(d0) -&gt; (d0)&gt;],\n        iterator_types = [\"parallel\"]}\n      ins(%10, %11 :\n          memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;,\n          memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;)\n      outs(%12 : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;) {\n    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors\n      %13 = addf %arg1, %arg2 : f32\n      linalg.yield %13 : f32\n    }\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#vectorization","title":"Vectorization","text":"<p>Even though GPUs execute most operations as scalar, memory operations are optimized to access 128 bits of data per thread. Therefore it is critical to vectorize load/store operations. After tiling to a size we vectorize the IR to get vector read/write mapping to load4/store4. This significantly improves the memory access pattern of the code generated.</p> <p>This convert the previous IR to:</p> <pre><code>    %8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -&gt; index\n    %9 = affine.apply affine_map&lt;()[s0] -&gt; (s0 * 4)&gt;()[%8]\n    %10 = memref.subview %in0[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %11 = memref.subview %in1[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %12 = memref.subview %out[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %13 = vector.transfer_read %10[%c0], %cst {in_bounds = [true]} : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;, vector&lt;4xf32&gt;\n    %14 = vector.transfer_read %11[%c0], %cst {in_bounds = [true]} : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;, vector&lt;4xf32&gt;\n    %15 = addf %13, %14 : vector&lt;4xf32&gt;\n    vector.transfer_write %15, %12[%c0] {in_bounds = [true]} : vector&lt;4xf32&gt;, memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#shared-memory-optimization","title":"Shared memory optimization","text":"<p>Nvidia GPUs have a fast shared memory that needs to be leveraged to optimize cases where we may be memory bound and have the potential to re-use memory reads.</p> <p>For operations like GEMM using shared memory gives us a significant speed up. We leverage memory promotion, vector distribution and software pipelining transformations from MLIR to generate efficient copies from global to shared memory that can be interleaved with the compute work.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#optimization-pipeline","title":"Optimization pipeline","text":"<p>Those different transformations compose to this flow:</p> <p></p> <p>The full dump step by step of a linalg.matmul operation can be found here.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#results-and-next-steps","title":"Results and next steps","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#gemm","title":"GEMM","text":"<p>We compare the performance of a single GEMM operation to highly optimized library cuBLAS using mmperf framework.</p> <p></p> <p>The graph can be re-produced based on instructions on mmperf</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#future-work","title":"Future work","text":"<p>Nod.ai has contributed an experimental HAL module for ROCM that allows us to re-use the compiler parts to support ROCM, more support is going to be added in the future.</p> <p>Several performance improvements are still under progress, including optimizing the runtime allocator to reduce the host-side overhead and tuning tile sizes based profiling.</p> <p>Several models are running and we will publish more detailed benchmark results in the near future.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/","title":"IREE / MLIR / Linalg tutorial","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#introduction","title":"Introduction","text":"<p>This tutorial is simultaneously about IREE, MLIR, and specifically the MLIR Linalg dialect.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-mlir","title":"What is MLIR?","text":"<p>MLIR is a programming language, but MLIR in itself is almost just an empty shell. What it really provides is a framework allowing to define MLIR dialects which are where the features come from.</p> <p>The \"IR\" part of the MLIR name stands for \"intermediate representation\". It means that MLIR is meant to be primarily for compiler-internal representations of code. But MLIR is actually fairly nice for humans to work with, and it's not hard to hand-author some MLIR programs from scratch. That is exactly the topic of this tutorial.</p> <p>The \"ML\" part of the MLIR name stands for \"multi-level\" (not machine learning!). It means that MLIR allows for multiple dialects to be freely mixed in the same MLIR programs. Each dialect can define operations, types and attributes, and each single MLIR statement can mix ops, types and attributes coming from different dialects.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-the-linalg-dialect","title":"What is the Linalg dialect?","text":"<p>Linalg is a MLIR dialect that essentially consists of a single op, <code>linalg.generic</code>, with most other ops in this dialect being just convenience aliases for special cases of <code>linalg.generic</code>. So, to describe Linalg dialect is essentially to describe <code>linalg.generic</code>.</p> <p>The point of this is that this single op, <code>linalg.generic</code>, is:</p> <ul> <li>General enough to express the entirety of usual machine learning workloads in   any quantization scheme at all.</li> <li>High-level enough to be lowered to efficient code for any target (CPU, GPU,   ...)</li> <li>Designed to be a good fit for compiler IR-to-IR transformations.</li> </ul> <p>These traits make the Linalg dialect an ideal \"middle-end\" IR for a machine learning compiler.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-iree","title":"What is IREE?","text":"<p>IREE is a MLIR compiler and runtime that can lower MLIR programs through successive, ever lower-level dialects, ultimately producing machine code for various CPU, GPU and other hardware targets. Check out the Developer overview docs and the ML frameworks docs.</p> <p>Front-ends can ingest source programs from various machine-learning frameworks into MLIR Linalg dialect. Boundaries are in flux, but it is a good enough mental model to think of anything up to Linalg as \"front-end\". One example is, for ingesting PyTorch programs, the front-end is torch-mlir and end-users are encouraged to use iree-turbine, which integrates IREE, torch-mlir and PyTorch.</p> <p>This tutorial is only concerned about the Linalg dialect, and we are going to learn to hand-author some Linalg programs. The point of the above tangent about front-ends is to make it clear that no matter which way you feed a program into IREE, it will internally be rewritten into a Linalg program, because that really is the intermediate representation in this compiler.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#getting-iree-binaries","title":"Getting IREE binaries","text":"<p>IREE builds can be downloaded or installed as Python packages or built from sources.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#first-linalg-programs","title":"First linalg programs","text":"<p>Before we start: there is also an official Linalg tutorial. It takes a different approach compared to the present tutorial, so the two are complementary.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#static-shape-element-wise-addition-of-two-1d-arrays","title":"Static-shape, element-wise addition of two 1D arrays","text":"<p>Here is our first Linalg function. The scalar type used in this program, <code>f32</code>, is 32-bit floating-point.</p> <p>Notice some elements of MLIR syntax:</p> <ul> <li>The <code>%</code> prefix on an identifier indicates a   SSA value, like   here <code>%result</code>.</li> <li>The <code>@</code> prefix on an identifier indicates a function, like here <code>@foo</code>.</li> <li>The <code>^</code> prefix on an identifier indicates a   block, like here <code>^bb0</code>.</li> <li>The <code>#</code> prefix on an identifier indicates an   attribute alias,   like here <code>#map_1d_identity</code>.</li> <li>The <code>x</code> letter is used as delimiter in shapes, and between the shape and the   element type, like here <code>10xf32</code> meaning a 1D shape of size 10 with element   type <code>f32</code>.</li> <li>Operations have the form <code>dialect.name</code>. For example, <code>tensor.empty</code> is the   <code>empty</code> operation within the <code>tensor</code> dialect, and <code>func.func</code> is the <code>func</code>   operation within the <code>func</code> dialect.</li> </ul> <pre><code>// The 1D identity map, used below.\n#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\n// Define a function @foo taking two tensor arguments `%lhs` and `%rhs` and returning a tensor.\nfunc.func @foo(\n      %lhs : tensor&lt;10xf32&gt;,\n      %rhs : tensor&lt;10xf32&gt;\n    ) -&gt; tensor&lt;10xf32&gt; {\n  // A constant used below.\n  %c0f32 = arith.constant 0.0 : f32\n  // Create a result \"init value\". Think of it as an abstract \"allocation\",\n  // creating a tensor but not giving its elements any particular value. It would be\n  // undefined behavior to read any element from this tensor.\n  %result_empty =  tensor.empty() : tensor&lt;10xf32&gt;\n\n  // Perform the computation. The following is all a single linalg.generic op.\n\n  %result = linalg.generic {\n    // This {...} section is the \"attributes\" - some compile-time settings for this op.\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    // There is one tensor dimension, and it's a parallel-iteration dimension,\n    // meaning that it occurs also as a result tensor dimension. The alternative\n    // would be \"reduction\", for dimensions that do not occur in the result tensor.\n    iterator_types=[\"parallel\"]\n  } // End of the attributes for this linalg.generic. Next come the parameters:\n    // `ins` is where we pass regular input-parameters\n    ins(%lhs, %rhs : tensor&lt;10xf32&gt;, tensor&lt;10xf32&gt;)\n    // `outs` is where we pass the \"outputs\", but that term has a subtle meaning\n    // in linalg. Here we are passing a tensor.empty, meaning just a placeholder\n    // for the output with no preexisting element values. In other examples with\n    // an accumulator, this is where the accumulator would be passed.\n    outs(%result_empty : tensor&lt;10xf32&gt;)\n    // End of parameters. The next {...} part is the \"code block\".\n  {\n    // bb0 is a code block taking one scalar from each input tensor as argument, and\n    // computing and \"yielding\" (ie returning) the corresponding output tensor element.\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32, %unused_result_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  } // End of the basic block. Finally, we describe the return type.\n  -&gt; tensor&lt;10xf32&gt;\n\n  // End of the linalg.generic op.\n\n  // Return the function's return value.\n  return %result : tensor&lt;10xf32&gt;\n}\n</code></pre> <p>Compile it like this:</p> <pre><code>iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n</code></pre> <p>Note</p> <p>These are just minimalist <code>iree-compile</code> flags for running on CPU without trying to maximize performance.</p> <ul> <li>To run on GPU or other non-CPU targets, explore other values for   <code>--iree-hal-target-backends=</code>. You will then need to pass a matching   <code>--device=</code> to <code>iree-run-module</code> below.</li> <li>To cross-compile, explore <code>--iree-llvmcpu-target-triple=</code>.</li> <li>To enable higher CPU performance by enabling CPU features:</li> <li>On x86, explore <code>--iree-llvmcpu-target-cpu=</code> (e.g.     <code>--iree-llvmcpu-target-cpu=znver4</code> to target AMD Zen4).</li> <li>On other architectures, explore <code>--iree-llvmcpu-target-cpu-features=</code>.</li> <li>To optimize for running on the same machine that the compilation ran     on, pass  <code>--iree-llvmcpu-target-cpu=host</code>. That works regardless of     CPU architecture.</li> <li>Check out   these docs for   more useful <code>iree-compile</code> flags.</li> </ul> <p>Run it like this:</p> <pre><code>$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre> <p>Here, each <code>--input</code> parameter specifies one input. First its shape and element type, <code>10xf32</code>, then the example array elements in <code>[...]</code> brackets. The output of <code>iree-run-module</code> above shows the contents of the result.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#dynamic-shape-element-wise-addition-of-two-1d-arrays","title":"Dynamic-shape, element-wise addition of two 1D arrays","text":"<p>While we are going to mostly focus on static shapes for simplicity in the rest of this tutorial, let us give one dynamic-shape example to at least show that that's not a problem. Here is the dynamic-shape equivalent of the previous example.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %lhs : tensor&lt;?xf32&gt;,\n      %rhs : tensor&lt;?xf32&gt;\n    ) -&gt; tensor&lt;?xf32&gt; {\n  %c0f32 = arith.constant 0.0 : f32\n  %c0 = arith.constant 0 : index\n  %size = tensor.dim %lhs, %c0 : tensor&lt;?xf32&gt;\n  %result_empty =  tensor.empty(%size) : tensor&lt;?xf32&gt;\n\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    iterator_types=[\"parallel\"]\n  } ins(%lhs, %rhs : tensor&lt;?xf32&gt;, tensor&lt;?xf32&gt;)\n    outs(%result_empty : tensor&lt;?xf32&gt;)\n  {\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32, %unused_result_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;?xf32&gt;\n\n  return %result : tensor&lt;?xf32&gt;\n}\n</code></pre> <p>This program can be compiled and run exactly like the previous one, except that now the <code>iree-run-module</code> command may specify inputs of arbitrary length. The only requirement is that both inputs have the same length, otherwise the <code>linalg.generic</code> will have undefined behavior.</p> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#passing-one-of-the-inputs-in-outs","title":"Passing one of the inputs in <code>outs</code>","text":"<p>Here is a more concise variant achieving the same result in fewer lines of code, and giving us a first taste of that that <code>outs(...)</code> parameters list can do. We didn't want to show it first, because it's less idiomatic. <code>outs</code> will only become really necessary (and idiomatic) when we will look at <code>reduction</code> iterators. In the previous examples, we had only passed a <code>tensor.empty</code> placeholder for <code>outs</code>. This new example shows that we can actually pass there any of the inputs that are shaped like the result.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %lhs : tensor&lt;10xf32&gt;,\n      %rhs : tensor&lt;10xf32&gt;\n    ) -&gt; tensor&lt;10xf32&gt; {\n\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    iterator_types=[\"parallel\"]\n  } ins(%lhs : tensor&lt;10xf32&gt;)\n    outs(%rhs : tensor&lt;10xf32&gt;)\n  {\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;10xf32&gt;\n\n  return %result : tensor&lt;10xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#a-first-reduction-example-summing-a-1d-array","title":"A first <code>reduction</code> example: summing a 1D array","text":"<p>This function takes a 1D array of floats and returns their sum. <code>tensor&lt;f32&gt;</code> is a 0-dimensional tensor type. We could as well extract the single <code>f32</code> element and return that, but we wanted to make this example as simple as possible.</p> <p>What's subtle here is how the <code>bb0</code> block in the <code>linalg.generic</code> now actively uses the <code>%result_entry</code> as an operand to <code>arith.addf</code>, yielding the result of this addition on every iteration. Implicitly, this stores the result of that addition to the destination, from where it is re-loaded on the next iteration again as <code>%result_entry</code>. So the SSA value <code>%result_entry</code> has a different value on each iteration.</p> <p>Because the values from the <code>outs</code> parameter are now actually used, we can't directly pass there the <code>tensor.empty</code>, whose elements are uninitialized. We have to initialize the result entries as zeroes, which is achieved by the <code>linalg.fill</code>.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n#map_1d_proj_0d = affine_map&lt;(m) -&gt; ()&gt;\n\nfunc.func @foo(\n      %input : tensor&lt;10xf32&gt;) -&gt; tensor&lt;f32&gt; {\n  %result_empty = tensor.empty() : tensor&lt;f32&gt;\n  %cst_0 = arith.constant 0.0 : f32\n  %result_init = linalg.fill ins(%cst_0 : f32) outs(%result_empty : tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_proj_0d\n    ],\n    iterator_types=[\"reduction\"]\n  } ins(%input : tensor&lt;10xf32&gt;)\n    outs(%result_init : tensor&lt;f32&gt;)\n  {\n    ^bb0(%input_entry : f32, %result_entry : f32):\n      %add = arith.addf %input_entry, %result_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;f32&gt;\n\n  return %result : tensor&lt;f32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb --input=10xf32=[0,1,2,3,4,5,6,7,8,9]\n\nEXEC @foo\nresult[0]: hal.buffer_view\nf32=45\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#combining-parallel-and-reduction-iterators-summing-each-row-of-a-2d-array","title":"Combining <code>parallel</code> and <code>reduction</code> iterators: summing each row of a 2D array.","text":"<p>This is our first 2D example so for the first time we have to start explaining how the <code>iterator_types</code> are enumerated and we start seeing some more interesting examples of <code>affine_map</code>.</p> <pre><code>#map_2d_identity = affine_map&lt;(m, n) -&gt; (m, n)&gt;\n#map_2d_proj_first = affine_map&lt;(m, n) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %input : tensor&lt;3x5xf32&gt;) -&gt; tensor&lt;3xf32&gt; {\n  %result_empty = tensor.empty() : tensor&lt;3xf32&gt;\n  %cst_0 = arith.constant 0.0 : f32\n  %result_init = linalg.fill ins(%cst_0 : f32) outs(%result_empty : tensor&lt;3xf32&gt;) -&gt; tensor&lt;3xf32&gt;\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_2d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_2d_proj_first\n    ],\n    iterator_types=[\n      // Rule: the i-th iterator_type corresponds to the i-th coordinate in the\n      // source space of the affine_maps defined above, (m, n). So:\n      \"parallel\",  // This refers to the `m` coordinate in the affine-maps.\n                   // This is the coordinate that is preserved in the result,\n                   // see the map_2d_proj_first map given above.\n      \"reduction\" // This refers to the `n` coordinate in the affine-maps.\n                  // This is the coordinate that is dropped by the map_2d_proj_first\n                  // given above and thus not present in the 1D result.\n    ]\n  } ins(%input : tensor&lt;3x5xf32&gt;)\n    outs(%result_init : tensor&lt;3xf32&gt;)\n  {\n    ^bb0(%input_entry : f32, %result_entry : f32):\n      %add = arith.addf %input_entry, %result_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;3xf32&gt;\n\n  return %result : tensor&lt;3xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=3x5xf32=[[0,1,2,3,4],[5,6,7,8,9],[10,11,12,13,14]]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n3xf32=10 35 60\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#matrix-multiplication-as-a-linalgmatmul-and-as-a-linalggeneric","title":"Matrix multiplication as a <code>linalg.matmul</code> and as a <code>linalg.generic</code>","text":"<p>We are now ready to see how to express matrix multiplication as a <code>linalg.generic</code>. But actually, rather than just writing that by hand, we are going to let Linalg do it for us. Indeed, in addition to <code>linalg.generic</code>, Linalg contains a number of \"named ops\", which are essentially just short-hand notation for special cases of <code>linalg.generic</code>. One of them is <code>linalg.matmul</code>, doing matrix multiplication accumulating into an existing accumulator. Here is a simple function performing a matrix-multiplication-with-accumulation using <code>linalg.matmul</code>. Also in this example, we use dynamic shapes (the <code>?</code> in the shapes, see the above section where we encountered that), but we could just as well use static shapes.</p> <pre><code>func.func @foo(%lhs: tensor&lt;?x?xf32&gt;, %rhs: tensor&lt;?x?xf32&gt;, %acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n  %result = linalg.matmul\n    ins(%lhs, %rhs: tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;)\n    outs(%acc: tensor&lt;?x?xf32&gt;)\n  -&gt; tensor&lt;?x?xf32&gt;\n  return %result: tensor&lt;?x?xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=2x2xf32=[[1,2][3,4]] \\\n  --input=2x2xf32=[[1,4][3,2]] \\\n  --input=2x2xf32=[[0,0][0,0]]\n\nEXEC @matmul_dynamic\nresult[0]: hal.buffer_view\n2x2xf32=[7 8][15 20]\n</code></pre> <p>Now we encounter another IREE tool: <code>iree-opt</code>. Unlike <code>iree-compile</code> which compiles a MLIR program all the way down to a <code>.vmfb</code> that's ready to run on the target device, <code>iree-opt</code> only applies selected transformations.</p> <p>We run:</p> <pre><code>iree-opt --linalg-generalize-named-ops prog.mlir\n</code></pre> <p>And that prints:</p> <pre><code>#map = affine_map&lt;(d0, d1, d2) -&gt; (d0, d2)&gt;\n#map1 = affine_map&lt;(d0, d1, d2) -&gt; (d2, d1)&gt;\n#map2 = affine_map&lt;(d0, d1, d2) -&gt; (d0, d1)&gt;\nmodule {\n  func.func @foo(%arg0: tensor&lt;?x?xf32&gt;, %arg1: tensor&lt;?x?xf32&gt;, %arg2: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = [\"parallel\", \"parallel\", \"reduction\"]} ins(%arg0, %arg1 : tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%arg2 : tensor&lt;?x?xf32&gt;) {\n    ^bb0(%in: f32, %in_0: f32, %out: f32):\n      %1 = arith.mulf %in, %in_0 : f32\n      %2 = arith.addf %out, %1 : f32\n      linalg.yield %2 : f32\n    } -&gt; tensor&lt;?x?xf32&gt;\n    return %0 : tensor&lt;?x?xf32&gt;\n  }\n}\n</code></pre> <p>So that's the <code>linalg.generic</code> implementing matrix multiplication equivalently to the above <code>linalg.matmul</code> form. We can  compile and run that like the above program and it will have exactly the same result.</p> <p>Here the 3 listed <code>iterator_types</code>, <code>[\"parallel\", \"parallel\", \"reduction\"]</code>, correspond to the 3 listed coordinates in the <code>affine_map</code>'s, <code>(d0, d1, d2)</code>. So, <code>d0</code> and <code>d1</code> are parallel dimensions and <code>d2</code> is the reduction dimension. That's why the first two <code>affine_map</code>'s results involve <code>d2</code> (they are respectively for the LHS <code>%arg0</code> and RHS <code>%arg1</code>) and the last <code>affine_map</code>'s result only involves the parallel <code>d0</code> and <code>d1</code>, as it refers to the result matrix.</p> <p>Note</p> <p>Some current IREE compiler optimizations are only triggering on named ops like <code>linalg.matmul</code>, not on the equivalent <code>linalg.generic</code> form. Think of that as a non-essential current limitation, and the intent is over time to overcome these, but in the near term do use <code>linalg.matmul</code> when performance matters.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#integer-element-types","title":"Integer element types","text":"<p>MLIR defines integer types for absolutely any bit-width, including non-power-of-two bit-widths, and in three signedness flavors:</p> <ul> <li>Signed integers, indicated by the letters <code>si</code>.</li> <li>Unsigned integers, indicated by the letters <code>ui</code>.</li> <li>Sign-less integers indicated by the letter <code>i</code>. \"Sign-less\" means that the   integer type does not convey signedness; the integer value may be used as   either a signed or an unsigned value but that's a property of the operation   using that value as an operand, that's not encoded in the type.</li> </ul> <p>So for instance, <code>si16</code> is the 16-bit signed integer type, <code>ui24</code> is the 24-bit unsigned integer type, and <code>i8</code> is the sign-less 8-bit integer type.</p> <p>Now here is a very important principle of how the MLIR dialects that are relevant to us in IREE operate:</p> <p>Note</p> <p>Only use sign-less types. Always encode signedness in operations, not in types.</p> <p>For example, here is how we perform a matrix multiplication where the LHS is signed 8-bit integers, the RHS is unsigned 8-bit integers, and the accumulator is signed 32-bit integers. Notice how the fact that LHS is signed and the RHS is unsigned is encoded only in the implementation of the <code>linalg.generic</code> basic block, where the LHS and RHS entries are extended, respectively as signed (<code>arith.extsi</code>) and unsigned (<code>arith.extui</code>):</p> <pre><code>#map = affine_map&lt;(d0, d1, d2) -&gt; (d0, d2)&gt;\n#map1 = affine_map&lt;(d0, d1, d2) -&gt; (d2, d1)&gt;\n#map2 = affine_map&lt;(d0, d1, d2) -&gt; (d0, d1)&gt;\nmodule {\n  func.func @foo(%lhs: tensor&lt;?x?xi8&gt;, %rhs: tensor&lt;?x?xi8&gt;, %acc: tensor&lt;?x?xi32&gt;) -&gt; tensor&lt;?x?xi32&gt; {\n    %result = linalg.generic\n      {indexing_maps = [#map, #map1, #map2],\n       iterator_types = [\"parallel\", \"parallel\", \"reduction\"]}\n      ins(%lhs, %rhs : tensor&lt;?x?xi8&gt;, tensor&lt;?x?xi8&gt;)\n      outs(%acc : tensor&lt;?x?xi32&gt;) {\n    ^bb0(%lhs_entry: i8, %rhs_entry: i8, %acc_entry: i32):\n      %lhs_extended = arith.extsi %lhs_entry : i8 to i32\n      %rhs_extended = arith.extui %rhs_entry : i8 to i32\n      %mul = arith.muli %lhs_extended, %rhs_extended : i32\n      %add = arith.addi %acc_entry, %mul : i32\n      linalg.yield %add : i32\n    } -&gt; tensor&lt;?x?xi32&gt;\n    return %result : tensor&lt;?x?xi32&gt;\n  }\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=2x2xi8=[[-1,-2][-3,-4]] \\\n  --input=2x2xi8=[[1,4][3,2]] \\\n  --input=2x2xi32=[[0,0][0,0]]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n2x2xi32=[-7 -8][-15 -20]\n</code></pre> <p>Note</p> <p>A current runtime limitation, https://github.com/iree-org/iree/issues/16241, prevents passing sub-byte-bit-width integers on the <code>iree-run-module</code> command line.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/","title":"Exploring CPU microkernels on a matmul example","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#basic-setup-command-lines","title":"Basic setup, command lines","text":"<p>Source file: <code>matmul.mlir</code>:</p> <pre><code>func.func @matmul_dynamic(%lhs: tensor&lt;?x?xf32&gt;, %rhs: tensor&lt;?x?xf32&gt;, %acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n  %result = linalg.matmul ins(%lhs, %rhs: tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;\n  return %result: tensor&lt;?x?xf32&gt;\n}\n</code></pre> <p>Basic compilation command line:</p> <pre><code>$ iree-compile matmul.mlir -o /tmp/matmul.vmfb \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-target-cpu=znver4 \\\n  --iree-llvmcpu-enable-ukernels=all\n</code></pre> <p>This creates a IREE bytecode module:</p> <pre><code>$ ls -l /tmp/matmul.vmfb\n\n-rw-rw-r-- 1 2884 Jan 22 10:37 /tmp/matmul.vmfb\n</code></pre> <p>The above <code>.vmfb</code> is the only thing that's needed to run this matmul on the target device. But to understand microkernels, we are now going to generate additional intermediate files.</p> <p>Additional <code>iree-compile</code> flags to save intermediate files (IR, assembly, object code):</p> <pre><code>--iree-hal-dump-executable-intermediates-to=/tmp/matmul --x86-asm-syntax=intel\n</code></pre> <p>This saves LLVM IR in binary serialization (\"bitcode\", filename extension <code>.bc</code>). To read it, we need to \"disassemble\" it using <code>llvm-dis</code> to obtain textual IR (filename extension <code>.ll</code>).</p> <pre><code>llvm-dis /tmp/matmul/*.bc\n</code></pre> <p>Intermediate files:</p> <pre><code>  35196 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.bc\n 251597 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.ll\n 181740 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.linked.bc\n1396190 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.linked.ll\n  32096 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.o\n  34504 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.optimized.bc\n 184981 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.optimized.ll\n  82016 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.s\n</code></pre> <p>Another important <code>iree-compile</code> flag: <code>--mlir-print-ir-after-all</code> records the IR after each pass. We save that (stderr) output to a file, <code>ir.log</code> by appending to the <code>iree-compile</code> command line:</p> <pre><code>--mlir-print-ir-after-all 2&gt;/tmp/matmul/ir.log\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#overview-of-the-compilation-and-linking-flow","title":"Overview of the compilation and linking flow","text":"<p>This graph shows the transformations from the source <code>matmul.mlir</code> to the final <code>matmul.vmfb</code> with the various intermediates met in the previous section:</p> <pre><code>graph TD;\nmatmulontensors-- CPUMaterializeEncoding --&gt;mmt4dontensors;\nmmt4dontensors-- CPULowerToUKernels --&gt;ukernelontensors;\nukernelontensors-- IREEComprehensiveBufferize --&gt;ukernelonmemref;\nukernelonmemref-- LowerUKernelOpsToCalls --&gt;ukernelcall;\nukernelcall-- ConvertToLLVM --&gt;codegenll;\ncodegenll--&gt;bitcodelinking;\ngenericsource-- clang -emit-llvm --&gt; genericbitcode -- llvm-link --&gt; ukernelbitcode;\narchsource -- clang -emit-llvm --&gt; archbitcode -- llvm-link --&gt; ukernelbitcode;\nukernelbitcode--&gt;ukernelbitcodeembedded;\nukernelbitcodeembedded--&gt;bitcodelinking;\nbitcodelinking--&gt;linkedll;\nlinkedll -- IR optimization --&gt; optimizedll;\noptimizedll -- LLVM x86 backend --&gt; asm -- LLVM assembler --&gt; object -- iree-compile output --&gt; vmfb;\nmatmulontensors[\"linalg.matmul on tensors\"];\nmmt4dontensors[\"linalg.mmt4d on tensors\"];\nukernelontensors[\"ukernel.generic on tensors\"];\nukernelonmemref[\"ukernel.generic on memrefs\"];\nukernelcall[\"call to ukernel entry point\"];\ncodegenll[\"module_matmul_...codegen.ll\"];\nlinkedll[\"module_matmul_...linked.ll\"];\noptimizedll[\"module_matmul_...optimized.ll\"];\ngenericsource[\"generic source code\nmmt4d.c\"]\narchsource[\"architecture-specific source code\nmmt4d_x86_64_avx512_base.c\"]\ngenericbitcode[\"generic code as bitcode\nukernel_bitcode_generic_x86_64.bc\"]\narchbitcode[\"architecture-specific code as bitcode\nukernel_bitcode_arch_x86_64_avx512_base.bc\"]\nukernelbitcode[\"linked bitcode\nukernel_bitcode_x86_64.bc\"];\nukernelbitcodeembedded[\"microkernel bitcode embedded as\nstatic data in iree-compile\"];\nbitcodelinking[\"llvm::Linker::LinkInModule\"];\nasm[\"x86 asm, module_matmul_...s\"];\nobject[\"x86 ELF, module_matmul_...o\"];\nvmfb[\"matmul.vmfb\"];\n\nsubgraph Part1[\"Part 1: MLIR code generation\"]\n  matmulontensors\n  mmt4dontensors\n  ukernelontensors\n  ukernelonmemref\n  ukernelcall\n  codegenll\nend\n\nsubgraph Part2[\"Part 2: Microkernels compilation (part of the IREE build)\"]\n  genericsource\n  archsource\n  genericbitcode\n  archbitcode\n  ukernelbitcode\n  ukernelbitcodeembedded\nend\n\nsubgraph Part3[\"Part 3: Linking with microkernels, optimizing, producing object code\"]\n  bitcodelinking\n  linkedll\n  optimizedll\n  asm\n  object\n  vmfb\nend</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-1-mlir-code-generation","title":"\ud83d\udfe8 Part 1: MLIR code generation","text":"<p>Some initial boilerplate happens around our <code>linalg.matmul</code> before anything interesting happens to it.:</p> <p>\u27a4 Appendix: IR dump after WrapEntryPointsPass</p> <p>Next, the first interesting thing is the <code>CPUMaterializeEncoding</code> pass, where the <code>linalg.matmul</code> gets rewritten into a <code>linalg.mmt4d</code> which is a matmul with a tiled data layout. This is where we start specializing to the target ISA feature set, AVX-512, favoring a 16x16 tile size for this float32 matmul.</p> <p>\u27a4 Appendix: IR Dump After CPUMaterializeEncoding</p> <p>The idea is that <code>linalg.mmt4d</code> is what we will have a microkernel for, below. There is no need to have microkernels for anything but the target-optimal tiled layout, so we don't bother carrying a microkernel for <code>linalg.matmul</code> itself. The matrix layout transformation, bringing matrix data into this tiled layout, is also out of the scope of this <code>linalg.mmt4d</code> and hence of the <code>mmt4d</code> microkernel: we can rely on generic code-generation to take care of these byte-permutations, which is our preference as we aim to let that fuse into producers/consumers.</p> <p>Next comes the rewrite of <code>linalg.mmt4d</code> into a microkernel op, done by the <code>CPULowerToUKernels</code> pass. Here is the TableGen definition of the generic microkernel op we're going to generate:</p> <p>TableGen definition of <code>ukernel.generic</code></p> <p>C++ compiler code for CPULowerToUKernels</p> <p>\u27a4 Appendix: IR Dump After CPULowerToUKernels</p> <p>Notice that this IR is still working on <code>tensor</code> values, not on <code>memref</code> values.</p> <ul> <li>Rewrites are much nicer to perform on tensors than on memrefs.</li> <li><code>ukernel.generic</code> works with both tensors and memrefs.</li> <li>Allows performing the rewrite to <code>ukernel.generic</code> while still on tensors,   then just ride bufferization.</li> </ul> <p>Next, bufferization takes place. <code>tensor</code> values become <code>memref</code>.</p> <p>\u27a4 Appendix: IR Dump After IREEComprehensiveBufferize</p> <p>Next, the <code>LowerUKernelOpsToCalls</code> runs, rewriting <code>ukernel.generic</code> ops into function calls.</p> <ul> <li>Made possible by bufferization: there now are buffer pointers and strides to   pass to the target function.</li> </ul> <p>\u27a4 Appendix: IR Dump After LowerUKernelOpsToCalls</p> <p>Finally, this gets lowered to the MLIR LLVM dialect, in preparation for outputting plain LLVM IR.</p> <p>\u27a4 Appendix: IR Dump After ConvertToLLVM</p> <p>The above gets converted to plain LLVM IR and that's our first intermediate file, <code>module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.bc</code>, which <code>llvm-dis</code> helps disassemble into a textual IR file (<code>.ll</code>).</p> <p>\u27a4 Appendix: Intermediate file: <code>...codegen.bc</code>, disassembled to <code>...codegen.ll</code></p> <p>The above IR references an external symbol <code>iree_uk_mmt4d</code> for the microkernel that it calls, so it now needs to be linked against the ukernels bitcode.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-2-microkernels-compilation-part-of-the-iree-build","title":"\ud83d\udfe6 Part 2: Microkernels compilation (part of the IREE build)","text":"<p>Microkernels are:</p> <ul> <li>Compiled to self-contained bitcode, once for each target architecture.<ul> <li>That puts requirement on the source languages that they can be defined in.<ul> <li>Can use C via <code>clang -emit-llvm</code> plus extra flags like <code>-ffreestanding</code>.<ul> <li>The source must not <code>#include</code> standard library headers or do   anything OS-specific.</li> </ul> </li> <li>Can use inline assembly but not out-of-line assembly.</li> </ul> </li> </ul> </li> <li>Taking scalar parameters, including buffer pointers and strides.<ul> <li>Array-processing microkernels have a memory-to-memory interface.</li> <li>No vector-to-vector microkernels.<ul> <li>Store-to-load-forwarding can still happen post-linking, effectively   achieving the same.</li> <li>Microkernel ops avoid MLIR vector dialect altogether.</li> </ul> </li> </ul> </li> </ul> <p>C source code for the <code>iree_uk_mmt4d</code> microkernel entry point</p> <p>This calls an architecture-specific function to return a function pointer to the optimized inner-loop implementation to use for given data types and SIMD ISA features, and then uses that in a generic outer-loop implementation.</p> <p>So the really interesting part is the implementation of the inner-loop function that we got a function pointer to. For example, here is the one used in our example where the element type is <code>f32</code> and the target has AVX-512.</p> <p>A custom CMake function, <code>iree_bitcode_library</code>, wraps <code>clang</code> to compile these C source files with special flags to obtain freestanding bitcode.</p> <p>Likewise, a custom CMake function, <code>iree_link_bitcode</code>, wraps <code>llvm-link</code> to link bitcode files.</p> <p>These are used during the IREE compiler build (as a dependency of <code>iree-compile</code>) to build microkernels as bitcode for all supported target architectures, generating one bitcode file for each architecture in the build directory:</p> <pre><code>~/iree-build$ ls ./runtime/src/iree/builtins/ukernel/ukernel_bitcode_*.bc | grep -v generic\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_arm_32.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_arm_64.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_riscv_32.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_riscv_64.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_x86_64.bc\n</code></pre> <p>These files are then embedded as static data within <code>iree-compile</code>, so that <code>iree-compile</code> stays self-contained.</p> <p>Here are some samples of ukernel bitcode if you are curious what it looks like:</p> <p>\u27a4 Appendix: embedded microkernel bitcode: <code>iree_uk_mmt4d</code> ukernel entry point</p> <p>\u27a4 Appendix: embedded microkernel bitcode: inner-loop tile function</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-3-linking-with-microkernels-optimizing-producing-object-code","title":"\ud83d\udfe9 Part 3: Linking with microkernels, optimizing, producing object code","text":"<p>The previous two sections covered respectively the compilation of the MLIR module, and the compilation of microkernels, as two separate bitcode modules. Now we turn to how these two bitcode modules are linked together.</p> <p>After code generation, <code>iree-compile</code> loads microkernel bitcode: https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L490</p> <p>It is worth zooming into that <code>loadUKernelBitcode</code> function as, in addition to just loading the bitcode, it does one important thing: it adds the <code>alwaysinline</code> attribute on every function. As we will see just below, always inlining microkernels is key to achieving perfect results with no downsides compared to a pure code-generation approach. https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp#L36-L62</p> <p>And links it into the current module: https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L499</p> <p>The linked IR so far is not very interesting, as it is still essentially just the concatenation of the above-discussed codegen and microkernel bitcode (except now with <code>alwaysinline</code> attributes). If you are curious, it is dumped as the <code>...linked.bc</code> file.</p> <p>Where it gets interesting is that immediately after that, we run LLVM IR optimization passes, which can be thought of as a form of link-time optimization (LTO): https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L527</p> <p>At this point, all the microkernel code gets inlined into the dispatch function, the correct AVX-512 optimized tile function is selected and inlined, and everything else is DCE'd. That's how the user pays no cost for what they don't use --- not only for the microkernel entry points that they don't call, but also for all the unused code paths within each microkernel.</p> <p>\u27a4 Appendix: Intermediate file: <code>...optimized.bc</code>, disassembled to <code>...optimized.ll</code></p> <p>This then goes to the LLVM x86 backend, which produces x86 assembly.</p> <p>\u27a4 Appendix: x86 assembly</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#appendix","title":"Appendix","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-wrapentrypointspass","title":"IR dump after WrapEntryPointsPass","text":"<pre><code>// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //\n[...]\n// -----// IR Dump After Inliner (inline) //----- //\n#executable_target_embedded_elf_x86_64_ = #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\", {cpu = \"znver4\", cpu_features = \"+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512\", data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", native_vector_size = 64 : index, target_triple = \"x86_64-unknown-unknown-eabi-elf\", ukernels = \"all\"}&gt;\n#device_target_llvm_cpu = #hal.device.target&lt;\"llvm-cpu\", {executable_targets = [#executable_target_embedded_elf_x86_64_]}&gt;\nmodule attributes {hal.device.targets = [#device_target_llvm_cpu]} {\n  func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = \"sync func @matmul_dynamic(%input0: tensor&lt;?x?xf32&gt;, %input1: tensor&lt;?x?xf32&gt;, %input2: tensor&lt;?x?xf32&gt;) -&gt; (%output0: tensor&lt;?x?xf32&gt;)\"}} {\n    %0 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[0] : index\n    %1 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[1] : index\n    %2 = hal.tensor.import %arg0 \"input0\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%0, %1}\n    %3 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[0] : index\n    %4 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[1] : index\n    %5 = hal.tensor.import %arg1 \"input1\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%3, %4}\n    %6 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[0] : index\n    %7 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[1] : index\n    %8 = hal.tensor.import %arg2 \"input2\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%6, %7}\n    %9 = linalg.matmul ins(%2, %5 : tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%8 : tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;\n    %10 = hal.tensor.export %9 \"output0\" : tensor&lt;?x?xf32&gt;{%6, %7} -&gt; !hal.buffer_view\n    return %10 : !hal.buffer_view\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-cpumaterializeencoding","title":"IR Dump After CPUMaterializeEncoding","text":"<pre><code>// -----// IR Dump After CPUMaterializeEncoding (iree-codegen-cpu-materialize-encoding) //----- //\n[...]\n// -----// IR Dump After Canonicalizer (canonicalize) //----- //\n[...]\n// -----// IR Dump After CSE (cse) //----- //\n#executable_target_embedded_elf_x86_64_ = #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\", {cpu = \"znver4\", cpu_features = \"+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512\", data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", native_vector_size = 64 : index, target_triple = \"x86_64-unknown-unknown-eabi-elf\", ukernels = \"all\"}&gt;\n#map = affine_map&lt;()[s0] -&gt; (s0 ceildiv 16)&gt;\n#device_target_llvm_cpu = #hal.device.target&lt;\"llvm-cpu\", {executable_targets = [#executable_target_embedded_elf_x86_64_]}&gt;\nmodule attributes {hal.device.targets = [#device_target_llvm_cpu]} {\n  func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = \"sync func @matmul_dynamic(%input0: tensor&lt;?x?xf32&gt;, %input1: tensor&lt;?x?xf32&gt;, %input2: tensor&lt;?x?xf32&gt;) -&gt; (%output0: tensor&lt;?x?xf32&gt;)\"}} {\n    %cst = arith.constant 0.000000e+00 : f32\n    %0 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[0] : index\n    %1 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[1] : index\n    %2 = hal.tensor.import %arg0 \"input0\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%0, %1}\n    %3 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[0] : index\n    %4 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[1] : index\n    %5 = hal.tensor.import %arg1 \"input1\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%3, %4}\n    %6 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[0] : index\n    %7 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[1] : index\n    %8 = hal.tensor.import %arg2 \"input2\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%6, %7}\n    %9 = affine.apply #map()[%0]\n    %10 = tensor.empty(%9, %1) : tensor&lt;?x?x16x1xf32&gt;\n    %pack = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x1xf32&gt;\n    %11 = affine.apply #map()[%4]\n    %12 = tensor.empty(%11, %3) : tensor&lt;?x?x16x1xf32&gt;\n    %pack_0 = tensor.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x1xf32&gt;\n    %13 = affine.apply #map()[%6]\n    %14 = affine.apply #map()[%7]\n    %15 = tensor.empty(%13, %14) : tensor&lt;?x?x16x16xf32&gt;\n    %pack_1 = tensor.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x16xf32&gt;\n    %16 = linalg.mmt4d ins(%pack, %pack_0 : tensor&lt;?x?x16x1xf32&gt;, tensor&lt;?x?x16x1xf32&gt;) outs(%pack_1 : tensor&lt;?x?x16x16xf32&gt;) -&gt; tensor&lt;?x?x16x16xf32&gt;\n    %17 = tensor.empty(%6, %7) : tensor&lt;?x?xf32&gt;\n    %unpack = tensor.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor&lt;?x?x16x16xf32&gt; -&gt; tensor&lt;?x?xf32&gt;\n    %18 = hal.tensor.export %unpack \"output0\" : tensor&lt;?x?xf32&gt;{%6, %7} -&gt; !hal.buffer_view\n    return %18 : !hal.buffer_view\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-cpulowertoukernels","title":"IR Dump After CPULowerToUKernels","text":"<pre><code>// -----// IR Dump After CPULowerToUKernels (iree-codegen-cpu-lower-to-ukernels) //----- //\nmodule {\n  func.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n    %c1281_i32 = arith.constant 1281 : i32\n    %c1_i32 = arith.constant 1 : i32\n    %c16_i32 = arith.constant 16 : i32\n    %c1 = arith.constant 1 : index\n    %c0 = arith.constant 0 : index\n    %c32_i64 = arith.constant 32 : i64\n    %0 = hal.interface.constant.load[0] : i32\n    %1 = hal.interface.constant.load[1] : i32\n    %2 = hal.interface.constant.load[2] : i32\n    %3 = hal.interface.constant.load[3] : i32\n    %4 = hal.interface.constant.load[4] : i32\n    %5 = hal.interface.constant.load[5] : i32\n    %6 = hal.interface.constant.load[6] : i32\n    %7 = hal.interface.constant.load[7] : i32\n    %8 = hal.interface.constant.load[8] : i32\n    %9 = hal.interface.constant.load[9] : i32\n    %10 = hal.interface.constant.load[10] : i32\n    %11 = hal.interface.constant.load[11] : i32\n    %12 = hal.interface.constant.load[12] : i32\n    %13 = hal.interface.constant.load[13] : i32\n    %14 = hal.interface.constant.load[14] : i32\n    %15 = hal.interface.constant.load[15] : i32\n    %16 = arith.extui %0 : i32 to i64\n    %17 = arith.extui %1 : i32 to i64\n    %18 = arith.shli %17, %c32_i64 : i64\n    %19 = arith.ori %16, %18 : i64\n    %20 = arith.index_castui %19 : i64 to index\n    %21 = arith.extui %2 : i32 to i64\n    %22 = arith.extui %3 : i32 to i64\n    %23 = arith.shli %22, %c32_i64 : i64\n    %24 = arith.ori %21, %23 : i64\n    %25 = arith.index_castui %24 : i64 to index\n    %26 = arith.extui %4 : i32 to i64\n    %27 = arith.extui %5 : i32 to i64\n    %28 = arith.shli %27, %c32_i64 : i64\n    %29 = arith.ori %26, %28 : i64\n    %30 = arith.index_castui %29 : i64 to index\n    %31 = arith.extui %6 : i32 to i64\n    %32 = arith.extui %7 : i32 to i64\n    %33 = arith.shli %32, %c32_i64 : i64\n    %34 = arith.ori %31, %33 : i64\n    %35 = arith.index_castui %34 : i64 to index\n    %36 = arith.extui %8 : i32 to i64\n    %37 = arith.extui %9 : i32 to i64\n    %38 = arith.shli %37, %c32_i64 : i64\n    %39 = arith.ori %36, %38 : i64\n    %40 = arith.index_castui %39 : i64 to index\n    %41 = arith.extui %10 : i32 to i64\n    %42 = arith.extui %11 : i32 to i64\n    %43 = arith.shli %42, %c32_i64 : i64\n    %44 = arith.ori %41, %43 : i64\n    %45 = arith.index_castui %44 : i64 to index\n    %46 = arith.extui %12 : i32 to i64\n    %47 = arith.extui %13 : i32 to i64\n    %48 = arith.shli %47, %c32_i64 : i64\n    %49 = arith.ori %46, %48 : i64\n    %50 = arith.index_castui %49 : i64 to index\n    %51 = arith.extui %14 : i32 to i64\n    %52 = arith.extui %15 : i32 to i64\n    %53 = arith.shli %52, %c32_i64 : i64\n    %54 = arith.ori %51, %53 : i64\n    %55 = arith.index_castui %54 : i64 to index\n    %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%30, %35}\n    %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%40, %45}\n    %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55}\n    %workgroup_id_x = hal.interface.workgroup.id[0] : index\n    %workgroup_count_x = hal.interface.workgroup.count[0] : index\n    %workgroup_id_y = hal.interface.workgroup.id[1] : index\n    %workgroup_count_y = hal.interface.workgroup.count[1] : index\n    scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n      scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n        %59 = flow.dispatch.tensor.load %56, offsets = [%arg0, 0, 0, 0], sizes = [1, %35, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%30, %35} -&gt; tensor&lt;1x?x16x1xf32&gt;\n        %60 = flow.dispatch.tensor.load %57, offsets = [%arg1, 0, 0, 0], sizes = [1, %35, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%40, %45} -&gt; tensor&lt;1x?x16x1xf32&gt;\n        %61 = flow.dispatch.tensor.load %58, offsets = [%arg0, %arg1, 0, 0], sizes = [1, 1, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55} -&gt; tensor&lt;1x1x16x16xf32&gt;\n        %dim = tensor.dim %60, %c1 : tensor&lt;1x?x16x1xf32&gt;\n        %62 = iree_codegen.ukernel.generic \"iree_uk_mmt4d\" ins(%59, %60 : tensor&lt;1x?x16x1xf32&gt;, tensor&lt;1x?x16x1xf32&gt;) outs(%61 : tensor&lt;1x1x16x16xf32&gt;) (%c1, %c1, %dim, %c16_i32, %c16_i32, %c1_i32, %c1281_i32 : index, index, index, i32, i32, i32, i32) fn_def_attrs {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"]} strided_outer_dims(1) -&gt; tensor&lt;1x1x16x16xf32&gt;\n        flow.dispatch.tensor.store %62, %58, offsets = [%arg0, %arg1, 0, 0], sizes = [1, 1, 16, 16], strides = [1, 1, 1, 1] : tensor&lt;1x1x16x16xf32&gt; -&gt; !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55}\n      }\n    }\n    return\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-ireecomprehensivebufferize","title":"IR Dump After IREEComprehensiveBufferize","text":"<pre><code>// -----// IR Dump After IREEComprehensiveBufferize (iree-codegen-iree-comprehensive-bufferize) //----- //\n[...]\n// -----// IR Dump After EmptyTensorToAllocTensor (empty-tensor-to-alloc-tensor) //----- //\n[...]\n// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //\n[...]\n// -----// IR Dump After Canonicalizer (canonicalize) //----- //\n[...]\n// -----// IR Dump After CSE (cse) //----- //\n[...]\n// -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- //\nfunc.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n  %c1281_i32 = arith.constant 1281 : i32\n  %c1_i32 = arith.constant 1 : i32\n  %c16_i32 = arith.constant 16 : i32\n  %c1 = arith.constant 1 : index\n  %c0 = arith.constant 0 : index\n  %c32_i64 = arith.constant 32 : i64\n  %0 = hal.interface.constant.load[0] : i32\n  %1 = hal.interface.constant.load[1] : i32\n  %2 = hal.interface.constant.load[2] : i32\n  %3 = hal.interface.constant.load[3] : i32\n  %4 = hal.interface.constant.load[4] : i32\n  %5 = hal.interface.constant.load[5] : i32\n  %6 = hal.interface.constant.load[6] : i32\n  %7 = hal.interface.constant.load[7] : i32\n  %8 = hal.interface.constant.load[8] : i32\n  %9 = hal.interface.constant.load[9] : i32\n  %10 = hal.interface.constant.load[10] : i32\n  %11 = hal.interface.constant.load[11] : i32\n  %12 = hal.interface.constant.load[12] : i32\n  %13 = hal.interface.constant.load[13] : i32\n  %14 = hal.interface.constant.load[14] : i32\n  %15 = hal.interface.constant.load[15] : i32\n  %16 = arith.extui %0 : i32 to i64\n  %17 = arith.extui %1 : i32 to i64\n  %18 = arith.shli %17, %c32_i64 : i64\n  %19 = arith.ori %16, %18 : i64\n  %20 = arith.index_castui %19 : i64 to index\n  %21 = arith.extui %2 : i32 to i64\n  %22 = arith.extui %3 : i32 to i64\n  %23 = arith.shli %22, %c32_i64 : i64\n  %24 = arith.ori %21, %23 : i64\n  %25 = arith.index_castui %24 : i64 to index\n  %26 = arith.extui %4 : i32 to i64\n  %27 = arith.extui %5 : i32 to i64\n  %28 = arith.shli %27, %c32_i64 : i64\n  %29 = arith.ori %26, %28 : i64\n  %30 = arith.index_castui %29 : i64 to index\n  %31 = arith.extui %6 : i32 to i64\n  %32 = arith.extui %7 : i32 to i64\n  %33 = arith.shli %32, %c32_i64 : i64\n  %34 = arith.ori %31, %33 : i64\n  %35 = arith.index_castui %34 : i64 to index\n  %36 = arith.extui %8 : i32 to i64\n  %37 = arith.extui %9 : i32 to i64\n  %38 = arith.shli %37, %c32_i64 : i64\n  %39 = arith.ori %36, %38 : i64\n  %40 = arith.index_castui %39 : i64 to index\n  %41 = arith.extui %10 : i32 to i64\n  %42 = arith.extui %11 : i32 to i64\n  %43 = arith.shli %42, %c32_i64 : i64\n  %44 = arith.ori %41, %43 : i64\n  %45 = arith.index_castui %44 : i64 to index\n  %46 = arith.extui %12 : i32 to i64\n  %47 = arith.extui %13 : i32 to i64\n  %48 = arith.shli %47, %c32_i64 : i64\n  %49 = arith.ori %46, %48 : i64\n  %50 = arith.index_castui %49 : i64 to index\n  %51 = arith.extui %14 : i32 to i64\n  %52 = arith.extui %15 : i32 to i64\n  %53 = arith.shli %52, %c32_i64 : i64\n  %54 = arith.ori %51, %53 : i64\n  %55 = arith.index_castui %54 : i64 to index\n  %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%30, %35}\n  memref.assume_alignment %56, 64 : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%40, %45}\n  memref.assume_alignment %57, 1 : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%50, %55}\n  memref.assume_alignment %58, 1 : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %workgroup_id_x = hal.interface.workgroup.id[0] : index\n  %workgroup_count_x = hal.interface.workgroup.count[0] : index\n  %workgroup_id_y = hal.interface.workgroup.id[1] : index\n  %workgroup_count_y = hal.interface.workgroup.count[1] : index\n  scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n    %subview = memref.subview %56[%arg0, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n    scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n      %subview_0 = memref.subview %57[%arg1, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n      %subview_1 = memref.subview %58[%arg0, %arg1, 0, 0] [1, 1, 16, 16] [1, 1, 1, 1] : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n      iree_codegen.ukernel.generic \"iree_uk_mmt4d\" ins(%subview, %subview_0 : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;, memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;) outs(%subview_1 : memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;) (%c1, %c1, %35, %c16_i32, %c16_i32, %c1_i32, %c1281_i32 : index, index, index, i32, i32, i32, i32) fn_def_attrs {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"]} strided_outer_dims(1)\n    }\n  }\n  return\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-lowerukernelopstocalls","title":"IR Dump After LowerUKernelOpsToCalls","text":"<pre><code>// -----// IR Dump After LowerUKernelOpsToCalls (iree-codegen-lower-ukernel-ops-to-calls) //----- //\nmodule {\n  func.func private @iree_uk_mmt4d(memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, index, index, index, i32, i32, i32, i32) attributes {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"], llvm.bareptr = true}\n  func.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n    %c1281_i32 = arith.constant 1281 : i32\n    %c1_i32 = arith.constant 1 : i32\n    %c16_i32 = arith.constant 16 : i32\n    %c1 = arith.constant 1 : index\n    %c0 = arith.constant 0 : index\n    %c32_i64 = arith.constant 32 : i64\n    %0 = hal.interface.constant.load[0] : i32\n    %1 = hal.interface.constant.load[1] : i32\n    %2 = hal.interface.constant.load[2] : i32\n    %3 = hal.interface.constant.load[3] : i32\n    %4 = hal.interface.constant.load[4] : i32\n    %5 = hal.interface.constant.load[5] : i32\n    %6 = hal.interface.constant.load[6] : i32\n    %7 = hal.interface.constant.load[7] : i32\n    %8 = hal.interface.constant.load[8] : i32\n    %9 = hal.interface.constant.load[9] : i32\n    %10 = hal.interface.constant.load[10] : i32\n    %11 = hal.interface.constant.load[11] : i32\n    %12 = hal.interface.constant.load[12] : i32\n    %13 = hal.interface.constant.load[13] : i32\n    %14 = hal.interface.constant.load[14] : i32\n    %15 = hal.interface.constant.load[15] : i32\n    %16 = arith.extui %0 : i32 to i64\n    %17 = arith.extui %1 : i32 to i64\n    %18 = arith.shli %17, %c32_i64 : i64\n    %19 = arith.ori %16, %18 : i64\n    %20 = arith.index_castui %19 : i64 to index\n    %21 = arith.extui %2 : i32 to i64\n    %22 = arith.extui %3 : i32 to i64\n    %23 = arith.shli %22, %c32_i64 : i64\n    %24 = arith.ori %21, %23 : i64\n    %25 = arith.index_castui %24 : i64 to index\n    %26 = arith.extui %4 : i32 to i64\n    %27 = arith.extui %5 : i32 to i64\n    %28 = arith.shli %27, %c32_i64 : i64\n    %29 = arith.ori %26, %28 : i64\n    %30 = arith.index_castui %29 : i64 to index\n    %31 = arith.extui %6 : i32 to i64\n    %32 = arith.extui %7 : i32 to i64\n    %33 = arith.shli %32, %c32_i64 : i64\n    %34 = arith.ori %31, %33 : i64\n    %35 = arith.index_castui %34 : i64 to index\n    %36 = arith.extui %8 : i32 to i64\n    %37 = arith.extui %9 : i32 to i64\n    %38 = arith.shli %37, %c32_i64 : i64\n    %39 = arith.ori %36, %38 : i64\n    %40 = arith.index_castui %39 : i64 to index\n    %41 = arith.extui %10 : i32 to i64\n    %42 = arith.extui %11 : i32 to i64\n    %43 = arith.shli %42, %c32_i64 : i64\n    %44 = arith.ori %41, %43 : i64\n    %45 = arith.index_castui %44 : i64 to index\n    %46 = arith.extui %12 : i32 to i64\n    %47 = arith.extui %13 : i32 to i64\n    %48 = arith.shli %47, %c32_i64 : i64\n    %49 = arith.ori %46, %48 : i64\n    %50 = arith.index_castui %49 : i64 to index\n    %51 = arith.extui %14 : i32 to i64\n    %52 = arith.extui %15 : i32 to i64\n    %53 = arith.shli %52, %c32_i64 : i64\n    %54 = arith.ori %51, %53 : i64\n    %55 = arith.index_castui %54 : i64 to index\n    %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref&lt;?x?x16x1xf32&gt;{%30, %35}\n    memref.assume_alignment %56, 64 : memref&lt;?x?x16x1xf32&gt;\n    %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;{%40, %45}\n    memref.assume_alignment %57, 1 : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n    %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;{%50, %55}\n    memref.assume_alignment %58, 1 : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;\n    %workgroup_id_x = hal.interface.workgroup.id[0] : index\n    %workgroup_count_x = hal.interface.workgroup.count[0] : index\n    %workgroup_id_y = hal.interface.workgroup.id[1] : index\n    %workgroup_count_y = hal.interface.workgroup.count[1] : index\n    scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n      %subview = memref.subview %56[%arg0, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n      scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n        %subview_0 = memref.subview %57[%arg1, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n        %subview_1 = memref.subview %58[%arg0, %arg1, 0, 0] [1, 1, 16, 16] [1, 1, 1, 1] : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt; to memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;\n        %base_buffer, %offset, %sizes:4, %strides:4 = memref.extract_strided_metadata %subview : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        %base_buffer_2, %offset_3, %sizes_4:4, %strides_5:4 = memref.extract_strided_metadata %subview_0 : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        %base_buffer_6, %offset_7, %sizes_8:4, %strides_9:4 = memref.extract_strided_metadata %subview_1 : memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        func.call @iree_uk_mmt4d(%base_buffer, %offset, %strides#0, %base_buffer_2, %offset_3, %strides_5#0, %base_buffer_6, %offset_7, %strides_9#0, %c1, %c1, %35, %c16_i32, %c16_i32, %c1_i32, %c1281_i32) : (memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, index, index, index, i32, i32, i32, i32) -&gt; ()\n      }\n    }\n    return\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-converttollvm","title":"IR Dump After ConvertToLLVM","text":"<pre><code>// -----// IR Dump After ConvertToLLVM (iree-convert-to-llvm) //----- //\nmodule attributes {llvm.data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", llvm.target_triple = \"x86_64-unknown-unknown-eabi-elf\"} {\n  llvm.func @iree_uk_mmt4d(!llvm.ptr) attributes {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"], llvm.bareptr = true}\n  llvm.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(%arg0: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}) -&gt; i32 {\n    %0 = llvm.mlir.constant(4293970975 : i64) : i64\n    %1 = llvm.mlir.constant(8 : i64) : i64\n    %2 = llvm.mlir.constant(0 : i32) : i32\n    %3 = llvm.mlir.constant(256 : index) : i64\n    %4 = llvm.mlir.constant(-1 : index) : i64\n    %5 = llvm.mlir.constant(4 : index) : i64\n    %6 = llvm.mlir.constant(16 : index) : i64\n    %7 = llvm.mlir.constant(0 : index) : i64\n    %8 = llvm.mlir.constant(1281 : i32) : i32\n    %9 = llvm.mlir.constant(1 : i32) : i32\n    %10 = llvm.mlir.constant(16 : i32) : i32\n    %11 = llvm.mlir.constant(1 : index) : i64\n    %12 = llvm.mlir.constant(32 : i64) : i64\n    %13 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %14 = llvm.extractvalue %13[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %15 = llvm.load %14 : !llvm.ptr -&gt; i32\n    %16 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %17 = llvm.extractvalue %16[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %18 = llvm.getelementptr %17[1] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %19 = llvm.load %18 : !llvm.ptr -&gt; i32\n    %20 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %21 = llvm.extractvalue %20[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %22 = llvm.getelementptr %21[2] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %23 = llvm.load %22 : !llvm.ptr -&gt; i32\n    %24 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %25 = llvm.extractvalue %24[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %26 = llvm.getelementptr %25[3] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %27 = llvm.load %26 : !llvm.ptr -&gt; i32\n    %28 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %29 = llvm.extractvalue %28[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %30 = llvm.getelementptr %29[4] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %31 = llvm.load %30 : !llvm.ptr -&gt; i32\n    %32 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %33 = llvm.extractvalue %32[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %34 = llvm.getelementptr %33[5] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %35 = llvm.load %34 : !llvm.ptr -&gt; i32\n    %36 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %37 = llvm.extractvalue %36[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %38 = llvm.getelementptr %37[6] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %39 = llvm.load %38 : !llvm.ptr -&gt; i32\n    %40 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %41 = llvm.extractvalue %40[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %42 = llvm.getelementptr %41[7] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %43 = llvm.load %42 : !llvm.ptr -&gt; i32\n    %44 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %45 = llvm.extractvalue %44[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %46 = llvm.getelementptr %45[8] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %47 = llvm.load %46 : !llvm.ptr -&gt; i32\n    %48 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %49 = llvm.extractvalue %48[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %50 = llvm.getelementptr %49[9] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %51 = llvm.load %50 : !llvm.ptr -&gt; i32\n    %52 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %53 = llvm.extractvalue %52[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %54 = llvm.getelementptr %53[10] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %55 = llvm.load %54 : !llvm.ptr -&gt; i32\n    %56 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %57 = llvm.extractvalue %56[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %58 = llvm.getelementptr %57[11] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %59 = llvm.load %58 : !llvm.ptr -&gt; i32\n    %60 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %61 = llvm.extractvalue %60[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %62 = llvm.getelementptr %61[14] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %63 = llvm.load %62 : !llvm.ptr -&gt; i32\n    %64 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %65 = llvm.extractvalue %64[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %66 = llvm.getelementptr %65[15] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %67 = llvm.load %66 : !llvm.ptr -&gt; i32\n    %68 = llvm.zext %15 : i32 to i64\n    %69 = llvm.zext %19 : i32 to i64\n    %70 = llvm.shl %69, %12  : i64\n    %71 = llvm.or %68, %70  : i64\n    %72 = llvm.zext %23 : i32 to i64\n    %73 = llvm.zext %27 : i32 to i64\n    %74 = llvm.shl %73, %12  : i64\n    %75 = llvm.or %72, %74  : i64\n    %76 = llvm.zext %31 : i32 to i64\n    %77 = llvm.zext %35 : i32 to i64\n    %78 = llvm.shl %77, %12  : i64\n    %79 = llvm.or %76, %78  : i64\n    %80 = llvm.zext %39 : i32 to i64\n    %81 = llvm.zext %43 : i32 to i64\n    %82 = llvm.shl %81, %12  : i64\n    %83 = llvm.or %80, %82  : i64\n    %84 = llvm.zext %47 : i32 to i64\n    %85 = llvm.zext %51 : i32 to i64\n    %86 = llvm.shl %85, %12  : i64\n    %87 = llvm.or %84, %86  : i64\n    %88 = llvm.zext %55 : i32 to i64\n    %89 = llvm.zext %59 : i32 to i64\n    %90 = llvm.shl %89, %12  : i64\n    %91 = llvm.or %88, %90  : i64\n    %92 = llvm.zext %63 : i32 to i64\n    %93 = llvm.zext %67 : i32 to i64\n    %94 = llvm.shl %93, %12  : i64\n    %95 = llvm.or %92, %94  : i64\n    %96 = llvm.mul %83, %6  : i64\n    %97 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %98 = llvm.extractvalue %97[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %99 = llvm.load %98 : !llvm.ptr -&gt; !llvm.ptr\n    %100 = llvm.mul %91, %6  : i64\n    %101 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %102 = llvm.extractvalue %101[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %103 = llvm.load %102 : !llvm.ptr -&gt; !llvm.ptr\n    %104 = llvm.mul %95, %3  : i64\n    %105 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %106 = llvm.extractvalue %105[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %107 = llvm.getelementptr %106[1] : (!llvm.ptr) -&gt; !llvm.ptr, !llvm.ptr\n    %108 = llvm.load %107 : !llvm.ptr -&gt; !llvm.ptr\n    %109 = llvm.load %arg2 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %110 = llvm.extractvalue %109[0] : !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %111 = llvm.zext %110 : i32 to i64\n    %112 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %113 = llvm.extractvalue %112[4] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %114 = llvm.zext %113 : i32 to i64\n    %115 = llvm.load %arg2 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %116 = llvm.extractvalue %115[1] : !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %117 = llvm.zext %116 : i32 to i64\n    %118 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %119 = llvm.extractvalue %118[5] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %120 = llvm.zext %119 : i32 to i64\n    llvm.br ^bb1(%117 : i64)\n  ^bb1(%121: i64):  // 2 preds: ^bb0, ^bb4\n    %122 = llvm.icmp \"slt\" %121, %79 : i64\n    llvm.cond_br %122, ^bb2(%111 : i64), ^bb5\n  ^bb2(%123: i64):  // 2 preds: ^bb1, ^bb3\n    %124 = llvm.icmp \"slt\" %123, %87 : i64\n    llvm.cond_br %124, ^bb3, ^bb4\n  ^bb3:  // pred: ^bb2\n    %125 = llvm.mul %83, %6  : i64\n    %126 = llvm.mul %121, %125  : i64\n    %127 = llvm.icmp \"slt\" %71, %7 : i64\n    %128 = llvm.sub %4, %71  : i64\n    %129 = llvm.select %127, %128, %71 : i1, i64\n    %130 = llvm.sdiv %129, %5  : i64\n    %131 = llvm.sub %4, %130  : i64\n    %132 = llvm.select %127, %131, %130 : i1, i64\n    %133 = llvm.mul %91, %6  : i64\n    %134 = llvm.mul %123, %133  : i64\n    %135 = llvm.add %132, %134  : i64\n    %136 = llvm.mul %123, %3  : i64\n    %137 = llvm.icmp \"slt\" %75, %7 : i64\n    %138 = llvm.sub %4, %75  : i64\n    %139 = llvm.select %137, %138, %75 : i1, i64\n    %140 = llvm.sdiv %139, %5  : i64\n    %141 = llvm.sub %4, %140  : i64\n    %142 = llvm.select %137, %141, %140 : i1, i64\n    %143 = llvm.add %136, %142  : i64\n    %144 = llvm.mul %95, %3  : i64\n    %145 = llvm.mul %121, %144  : i64\n    %146 = llvm.add %143, %145  : i64\n    %147 = llvm.getelementptr inbounds %arg0[4] : (!llvm.ptr) -&gt; !llvm.ptr, !llvm.ptr\n    %148 = llvm.alloca %1 x i64 {alignment = 8 : i64} : (i64) -&gt; !llvm.ptr\n    %149 = llvm.load %147 : !llvm.ptr -&gt; i64\n    %150 = llvm.or %149, %0  : i64\n    llvm.store %150, %148 : i64, !llvm.ptr\n    %151 = llvm.getelementptr inbounds %147[1] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %152 = llvm.load %151 : !llvm.ptr -&gt; i64\n    %153 = llvm.getelementptr inbounds %148[1] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %152, %153 : i64, !llvm.ptr\n    %154 = llvm.getelementptr inbounds %147[2] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %155 = llvm.load %154 : !llvm.ptr -&gt; i64\n    %156 = llvm.getelementptr inbounds %148[2] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %155, %156 : i64, !llvm.ptr\n    %157 = llvm.getelementptr inbounds %147[3] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %158 = llvm.load %157 : !llvm.ptr -&gt; i64\n    %159 = llvm.getelementptr inbounds %148[3] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %158, %159 : i64, !llvm.ptr\n    %160 = llvm.getelementptr inbounds %147[4] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %161 = llvm.load %160 : !llvm.ptr -&gt; i64\n    %162 = llvm.getelementptr inbounds %148[4] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %161, %162 : i64, !llvm.ptr\n    %163 = llvm.getelementptr inbounds %147[5] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %164 = llvm.load %163 : !llvm.ptr -&gt; i64\n    %165 = llvm.getelementptr inbounds %148[5] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %164, %165 : i64, !llvm.ptr\n    %166 = llvm.getelementptr inbounds %147[6] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %167 = llvm.load %166 : !llvm.ptr -&gt; i64\n    %168 = llvm.getelementptr inbounds %148[6] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %167, %168 : i64, !llvm.ptr\n    %169 = llvm.getelementptr inbounds %147[7] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %170 = llvm.load %169 : !llvm.ptr -&gt; i64\n    %171 = llvm.getelementptr inbounds %148[7] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %170, %171 : i64, !llvm.ptr\n    %172 = llvm.alloca %11 x !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt; : (i64) -&gt; !llvm.ptr\n    %173 = llvm.mlir.undef : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %174 = llvm.insertvalue %99, %173[0] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %175 = llvm.insertvalue %126, %174[1] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %176 = llvm.insertvalue %96, %175[2] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %177 = llvm.insertvalue %103, %176[3] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %178 = llvm.insertvalue %135, %177[4] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %179 = llvm.insertvalue %100, %178[5] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %180 = llvm.insertvalue %108, %179[6] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %181 = llvm.insertvalue %146, %180[7] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %182 = llvm.insertvalue %104, %181[8] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %183 = llvm.insertvalue %11, %182[9] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %184 = llvm.insertvalue %11, %183[10] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %185 = llvm.insertvalue %83, %184[11] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %186 = llvm.insertvalue %10, %185[12] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %187 = llvm.insertvalue %10, %186[13] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %188 = llvm.insertvalue %9, %187[14] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %189 = llvm.insertvalue %8, %188[15] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %190 = llvm.insertvalue %148, %189[16] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    llvm.store %190, %172 : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;, !llvm.ptr\n    llvm.call @iree_uk_mmt4d(%172) : (!llvm.ptr) -&gt; ()\n    %191 = llvm.add %123, %114  : i64\n    llvm.br ^bb2(%191 : i64)\n  ^bb4:  // pred: ^bb2\n    %192 = llvm.add %121, %120  : i64\n    llvm.br ^bb1(%192 : i64)\n  ^bb5:  // pred: ^bb1\n    llvm.return %2 : i32\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#intermediate-file-codegenbc-disassembled-to-codegenll","title":"Intermediate file: <code>...codegen.bc</code>, disassembled to <code>...codegen.ll</code>","text":"<pre><code>define internal i32 @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !90 {\n  %4 = load %iree_hal_executable_dispatch_state_v0_t.7, ptr %1, align 8, !dbg !91\n  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 9, !dbg !91\n  %6 = load i32, ptr %5, align 4, !dbg !91\n  %7 = getelementptr i32, ptr %5, i32 1, !dbg !91\n  %8 = load i32, ptr %7, align 4, !dbg !91\n  %9 = getelementptr i32, ptr %5, i32 2, !dbg !91\n  %10 = load i32, ptr %9, align 4, !dbg !91\n  %11 = getelementptr i32, ptr %5, i32 3, !dbg !91\n  %12 = load i32, ptr %11, align 4, !dbg !91\n  %13 = getelementptr i32, ptr %5, i32 4, !dbg !91\n  %14 = load i32, ptr %13, align 4, !dbg !91\n  %15 = getelementptr i32, ptr %5, i32 5, !dbg !91\n  %16 = load i32, ptr %15, align 4, !dbg !91\n  %17 = getelementptr i32, ptr %5, i32 6, !dbg !91\n  %18 = load i32, ptr %17, align 4, !dbg !91\n  %19 = getelementptr i32, ptr %5, i32 7, !dbg !91\n  %20 = load i32, ptr %19, align 4, !dbg !91\n  %21 = getelementptr i32, ptr %5, i32 8, !dbg !91\n  %22 = load i32, ptr %21, align 4, !dbg !91\n  %23 = getelementptr i32, ptr %5, i32 9, !dbg !91\n  %24 = load i32, ptr %23, align 4, !dbg !91\n  %25 = getelementptr i32, ptr %5, i32 10, !dbg !91\n  %26 = load i32, ptr %25, align 4, !dbg !91\n  %27 = getelementptr i32, ptr %5, i32 11, !dbg !91\n  %28 = load i32, ptr %27, align 4, !dbg !91\n  %29 = getelementptr i32, ptr %5, i32 14, !dbg !91\n  %30 = load i32, ptr %29, align 4, !dbg !91\n  %31 = getelementptr i32, ptr %5, i32 15, !dbg !91\n  %32 = load i32, ptr %31, align 4, !dbg !91\n  %33 = zext i32 %6 to i64, !dbg !91\n  %34 = zext i32 %8 to i64, !dbg !91\n  %35 = shl i64 %34, 32, !dbg !91\n  %36 = or i64 %33, %35, !dbg !91\n  %37 = zext i32 %10 to i64, !dbg !91\n  %38 = zext i32 %12 to i64, !dbg !91\n  %39 = shl i64 %38, 32, !dbg !91\n  %40 = or i64 %37, %39, !dbg !91\n  %41 = zext i32 %14 to i64, !dbg !91\n  %42 = zext i32 %16 to i64, !dbg !91\n  %43 = shl i64 %42, 32, !dbg !91\n  %44 = or i64 %41, %43, !dbg !91\n  %45 = zext i32 %18 to i64, !dbg !91\n  %46 = zext i32 %20 to i64, !dbg !91\n  %47 = shl i64 %46, 32, !dbg !91\n  %48 = or i64 %45, %47, !dbg !91\n  %49 = zext i32 %22 to i64, !dbg !91\n  %50 = zext i32 %24 to i64, !dbg !91\n  %51 = shl i64 %50, 32, !dbg !91\n  %52 = or i64 %49, %51, !dbg !91\n  %53 = zext i32 %26 to i64, !dbg !91\n  %54 = zext i32 %28 to i64, !dbg !91\n  %55 = shl i64 %54, 32, !dbg !91\n  %56 = or i64 %53, %55, !dbg !91\n  %57 = zext i32 %30 to i64, !dbg !91\n  %58 = zext i32 %32 to i64, !dbg !91\n  %59 = shl i64 %58, 32, !dbg !91\n  %60 = or i64 %57, %59, !dbg !91\n  %61 = mul i64 %48, 16, !dbg !91\n  %62 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 10, !dbg !91\n  %63 = load ptr, ptr %62, align 8, !dbg !91\n  %64 = mul i64 %56, 16, !dbg !91\n  %65 = mul i64 %60, 256, !dbg !91\n  %66 = getelementptr ptr, ptr %62, i32 1, !dbg !91\n  %67 = load ptr, ptr %66, align 8, !dbg !91\n  %68 = load %iree_hal_executable_workgroup_state_v0_t.8, ptr %2, align 8, !dbg !91\n  %69 = extractvalue %iree_hal_executable_workgroup_state_v0_t.8 %68, 0, !dbg !91\n  %70 = zext i32 %69 to i64, !dbg !91\n  %71 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 4, !dbg !91\n  %72 = zext i32 %71 to i64, !dbg !91\n  %73 = extractvalue %iree_hal_executable_workgroup_state_v0_t.8 %68, 1, !dbg !91\n  %74 = zext i32 %73 to i64, !dbg !91\n  %75 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 5, !dbg !91\n  %76 = zext i32 %75 to i64, !dbg !91\n  br label %77, !dbg !91\n\n77:                                               ; preds = %147, %3\n  %78 = phi i64 [ %148, %147 ], [ %74, %3 ]\n  %79 = icmp slt i64 %78, %44, !dbg !91\n  br i1 %79, label %80, label %149, !dbg !91\n\n80:                                               ; preds = %83, %77\n  %81 = phi i64 [ %146, %83 ], [ %70, %77 ]\n  %82 = icmp slt i64 %81, %52, !dbg !91\n  br i1 %82, label %83, label %147, !dbg !91\n\n83:                                               ; preds = %80\n  %84 = mul i64 %78, %61, !dbg !91\n  %85 = icmp slt i64 %36, 0, !dbg !91\n  %86 = sub i64 -1, %36, !dbg !91\n  %87 = select i1 %85, i64 %86, i64 %36, !dbg !91\n  %88 = sdiv i64 %87, 4, !dbg !91\n  %89 = sub i64 -1, %88, !dbg !91\n  %90 = select i1 %85, i64 %89, i64 %88, !dbg !91\n  %91 = mul i64 %81, %64, !dbg !91\n  %92 = add i64 %90, %91, !dbg !91\n  %93 = mul i64 %81, 256, !dbg !91\n  %94 = icmp slt i64 %40, 0, !dbg !91\n  %95 = sub i64 -1, %40, !dbg !91\n  %96 = select i1 %94, i64 %95, i64 %40, !dbg !91\n  %97 = sdiv i64 %96, 4, !dbg !91\n  %98 = sub i64 -1, %97, !dbg !91\n  %99 = select i1 %94, i64 %98, i64 %97, !dbg !91\n  %100 = add i64 %93, %99, !dbg !91\n  %101 = mul i64 %78, %65, !dbg !91\n  %102 = add i64 %100, %101, !dbg !91\n  %103 = getelementptr inbounds ptr, ptr %0, i32 4, !dbg !91\n  %104 = alloca i64, i64 8, align 8, !dbg !91\n  %105 = load i64, ptr %103, align 4, !dbg !91\n  %106 = or i64 %105, 4293970975, !dbg !91\n  store i64 %106, ptr %104, align 4, !dbg !91\n  %107 = getelementptr inbounds i64, ptr %103, i32 1, !dbg !91\n  %108 = load i64, ptr %107, align 4, !dbg !91\n  %109 = getelementptr inbounds i64, ptr %104, i32 1, !dbg !91\n  store i64 %108, ptr %109, align 4, !dbg !91\n  %110 = getelementptr inbounds i64, ptr %103, i32 2, !dbg !91\n  %111 = load i64, ptr %110, align 4, !dbg !91\n  %112 = getelementptr inbounds i64, ptr %104, i32 2, !dbg !91\n  store i64 %111, ptr %112, align 4, !dbg !91\n  %113 = getelementptr inbounds i64, ptr %103, i32 3, !dbg !91\n  %114 = load i64, ptr %113, align 4, !dbg !91\n  %115 = getelementptr inbounds i64, ptr %104, i32 3, !dbg !91\n  store i64 %114, ptr %115, align 4, !dbg !91\n  %116 = getelementptr inbounds i64, ptr %103, i32 4, !dbg !91\n  %117 = load i64, ptr %116, align 4, !dbg !91\n  %118 = getelementptr inbounds i64, ptr %104, i32 4, !dbg !91\n  store i64 %117, ptr %118, align 4, !dbg !91\n  %119 = getelementptr inbounds i64, ptr %103, i32 5, !dbg !91\n  %120 = load i64, ptr %119, align 4, !dbg !91\n  %121 = getelementptr inbounds i64, ptr %104, i32 5, !dbg !91\n  store i64 %120, ptr %121, align 4, !dbg !91\n  %122 = getelementptr inbounds i64, ptr %103, i32 6, !dbg !91\n  %123 = load i64, ptr %122, align 4, !dbg !91\n  %124 = getelementptr inbounds i64, ptr %104, i32 6, !dbg !91\n  store i64 %123, ptr %124, align 4, !dbg !91\n  %125 = getelementptr inbounds i64, ptr %103, i32 7, !dbg !91\n  %126 = load i64, ptr %125, align 4, !dbg !91\n  %127 = getelementptr inbounds i64, ptr %104, i32 7, !dbg !91\n  store i64 %126, ptr %127, align 4, !dbg !91\n  %128 = alloca { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr }, i64 1, align 8, !dbg !91\n  %129 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } undef, ptr %63, 0, !dbg !91\n  %130 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %129, i64 %84, 1, !dbg !91\n  %131 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %130, i64 %61, 2, !dbg !91\n  %132 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %131, ptr %63, 3, !dbg !91\n  %133 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %132, i64 %92, 4, !dbg !91\n  %134 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %133, i64 %64, 5, !dbg !91\n  %135 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %134, ptr %67, 6, !dbg !91\n  %136 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %135, i64 %102, 7, !dbg !91\n  %137 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %136, i64 %65, 8, !dbg !91\n  %138 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %137, i64 1, 9, !dbg !91\n  %139 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %138, i64 1, 10, !dbg !91\n  %140 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %139, i64 %48, 11, !dbg !91\n  %141 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %140, i32 16, 12, !dbg !91\n  %142 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %141, i32 16, 13, !dbg !91\n  %143 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %142, i32 1, 14, !dbg !91\n  %144 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %143, i32 1281, 15, !dbg !91\n  %145 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %144, ptr %104, 16, !dbg !91\n  store { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %145, ptr %128, align 8, !dbg !91\n  call void @iree_uk_mmt4d(ptr %128), !dbg !91\n  %146 = add i64 %81, %72, !dbg !91\n  br label %80, !dbg !91\n\n147:                                              ; preds = %80\n  %148 = add i64 %78, %76, !dbg !91\n  br label %77, !dbg !91\n\n149:                                              ; preds = %77\n  ret i32 0, !dbg !91\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ukernel-bitcode-entry-point","title":"Ukernel bitcode: entry point","text":"<pre><code>; Function Attrs: nounwind\ndefine dso_local noundef i32 @iree_uk_mmt4d(ptr noundef %0) local_unnamed_addr #10 {\n  %2 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 9\n  %3 = load i64, ptr %2, align 8, !tbaa !1001\n  %4 = icmp eq i64 %3, 0\n  br i1 %4, label %133, label %5\n\n5:                                                ; preds = %1\n  %6 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 10\n  %7 = load i64, ptr %6, align 8, !tbaa !1002\n  %8 = icmp eq i64 %7, 0\n  br i1 %8, label %133, label %9\n\n9:                                                ; preds = %5\n  %10 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 11\n  %11 = load i64, ptr %10, align 8, !tbaa !19\n  %12 = icmp eq i64 %11, 0\n  br i1 %12, label %13, label %18\n\n13:                                               ; preds = %9\n  %14 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 15\n  %15 = load i32, ptr %14, align 4, !tbaa !9\n  %16 = and i32 %15, 256\n  %17 = icmp eq i32 %16, 0\n  br i1 %17, label %18, label %133\n\n18:                                               ; preds = %13, %9\n  %19 = tail call ptr @iree_uk_mmt4d_select_tile_func(ptr noundef nonnull %0) #14\n  %20 = load i64, ptr %2, align 8, !tbaa !1001\n  %21 = trunc i64 %20 to i32\n  %22 = load i64, ptr %6, align 8, !tbaa !1002\n  %23 = trunc i64 %22 to i32\n  %24 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 15\n  %25 = load i32, ptr %24, align 4, !tbaa !9\n  %26 = zext i32 %25 to i64\n  %27 = shl i64 %26, 56\n  %28 = add i64 %27, -72057594037927936\n  %29 = ashr exact i64 %28, 56\n  %30 = getelementptr inbounds [9 x i32], ptr @switch.table.iree_uk_mmt4d, i64 0, i64 %29\n  %31 = load i32, ptr %30, align 4\n  %32 = lshr i32 %31, 8\n  %33 = and i32 %31, 7\n  %34 = and i32 %32, 7\n  %35 = and i32 %31, 327680\n  %36 = add nsw i32 %35, -196608\n  %37 = lshr exact i32 %36, 16\n  %38 = zext nneg i32 %37 to i64\n  %39 = zext nneg i32 %33 to i64\n  %40 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 3\n  %41 = load ptr, ptr %40, align 8, !tbaa !1003\n  %42 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 4\n  %43 = load i64, ptr %42, align 8, !tbaa !1004\n  %44 = zext nneg i32 %34 to i64\n  %45 = shl i64 %43, %44\n  %46 = sdiv i64 %45, 8\n  %47 = getelementptr inbounds i8, ptr %41, i64 %46\n  %48 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 2\n  %49 = load i64, ptr %48, align 8, !tbaa !1005\n  %50 = shl i64 %49, %39\n  %51 = sdiv i64 %50, 8\n  %52 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 5\n  %53 = load i64, ptr %52, align 8, !tbaa !1006\n  %54 = shl i64 %53, %44\n  %55 = sdiv i64 %54, 8\n  %56 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 8\n  %57 = load i64, ptr %56, align 8, !tbaa !1007\n  %58 = shl i64 %57, %38\n  %59 = icmp sgt i32 %21, 0\n  br i1 %59, label %60, label %133\n\n60:                                               ; preds = %18\n  %61 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 13\n  %62 = load i32, ptr %61, align 4, !tbaa !996\n  %63 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 12\n  %64 = load i32, ptr %63, align 8, !tbaa !1000\n  %65 = shl i32 %62, 16\n  %66 = ashr exact i32 %65, 16\n  %67 = shl i32 %64, 16\n  %68 = ashr exact i32 %67, 16\n  %69 = mul nsw i32 %66, %68\n  %70 = shl i32 %69, %37\n  %71 = load ptr, ptr %0, align 8, !tbaa !1008\n  %72 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 1\n  %73 = load i64, ptr %72, align 8, !tbaa !1009\n  %74 = shl i64 %73, %39\n  %75 = sdiv i64 %74, 8\n  %76 = getelementptr inbounds i8, ptr %71, i64 %75\n  %77 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 6\n  %78 = load ptr, ptr %77, align 8, !tbaa !1010\n  %79 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 7\n  %80 = load i64, ptr %79, align 8, !tbaa !1011\n  %81 = shl i64 %80, %38\n  %82 = getelementptr inbounds i8, ptr %78, i64 %81\n  %83 = icmp sgt i32 %23, 0\n  %84 = sext i32 %70 to i64\n  br i1 %83, label %90, label %85\n\n85:                                               ; preds = %60\n  %86 = and i32 %21, 3\n  %87 = icmp ult i32 %21, 4\n  br i1 %87, label %121, label %88\n\n88:                                               ; preds = %85\n  %89 = and i32 %21, 2147483644\n  br label %107\n\n90:                                               ; preds = %60, %102\n  %91 = phi i32 [ %105, %102 ], [ 0, %60 ]\n  %92 = phi ptr [ %103, %102 ], [ %82, %60 ]\n  %93 = phi ptr [ %104, %102 ], [ %76, %60 ]\n  tail call void @llvm.prefetch.p0(ptr %92, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %93, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  br label %94\n\n94:                                               ; preds = %94, %90\n  %95 = phi i32 [ 0, %90 ], [ %100, %94 ]\n  %96 = phi ptr [ %47, %90 ], [ %99, %94 ]\n  %97 = phi ptr [ %92, %90 ], [ %98, %94 ]\n  tail call void %19(ptr noundef %97, ptr noundef %93, ptr noundef %96, ptr noundef nonnull %0) #14\n  %98 = getelementptr inbounds i8, ptr %97, i64 %84\n  %99 = getelementptr inbounds i8, ptr %96, i64 %55\n  %100 = add nuw nsw i32 %95, 1\n  %101 = icmp eq i32 %100, %23\n  br i1 %101, label %102, label %94, !llvm.loop !1012\n\n102:                                              ; preds = %94\n  %103 = getelementptr inbounds i8, ptr %92, i64 %58\n  %104 = getelementptr inbounds i8, ptr %93, i64 %51\n  %105 = add nuw nsw i32 %91, 1\n  %106 = icmp eq i32 %105, %21\n  br i1 %106, label %133, label %90, !llvm.loop !1013\n\n107:                                              ; preds = %107, %88\n  %108 = phi ptr [ %82, %88 ], [ %117, %107 ]\n  %109 = phi ptr [ %76, %88 ], [ %118, %107 ]\n  %110 = phi i32 [ 0, %88 ], [ %119, %107 ]\n  tail call void @llvm.prefetch.p0(ptr %108, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %109, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %111 = getelementptr inbounds i8, ptr %108, i64 %58\n  %112 = getelementptr inbounds i8, ptr %109, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %111, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %112, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %113 = getelementptr inbounds i8, ptr %111, i64 %58\n  %114 = getelementptr inbounds i8, ptr %112, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %113, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %114, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %115 = getelementptr inbounds i8, ptr %113, i64 %58\n  %116 = getelementptr inbounds i8, ptr %114, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %115, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %116, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %117 = getelementptr inbounds i8, ptr %115, i64 %58\n  %118 = getelementptr inbounds i8, ptr %116, i64 %51\n  %119 = add i32 %110, 4\n  %120 = icmp eq i32 %119, %89\n  br i1 %120, label %121, label %107, !llvm.loop !1013\n\n121:                                              ; preds = %107, %85\n  %122 = phi ptr [ %82, %85 ], [ %117, %107 ]\n  %123 = phi ptr [ %76, %85 ], [ %118, %107 ]\n  %124 = icmp eq i32 %86, 0\n  br i1 %124, label %133, label %125\n\n125:                                              ; preds = %121, %125\n  %126 = phi ptr [ %129, %125 ], [ %122, %121 ]\n  %127 = phi ptr [ %130, %125 ], [ %123, %121 ]\n  %128 = phi i32 [ %131, %125 ], [ 0, %121 ]\n  tail call void @llvm.prefetch.p0(ptr %126, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %127, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %129 = getelementptr inbounds i8, ptr %126, i64 %58\n  %130 = getelementptr inbounds i8, ptr %127, i64 %51\n  %131 = add i32 %128, 1\n  %132 = icmp eq i32 %131, %86\n  br i1 %132, label %133, label %125, !llvm.loop !1014\n\n133:                                              ; preds = %121, %125, %102, %1, %5, %13, %18\n  ret i32 0\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ukernel-bitcode-tile-function","title":"Ukernel bitcode: tile function","text":"<pre><code>; Function Attrs: nofree norecurse nosync nounwind memory(read, argmem: readwrite, inaccessiblemem: readwrite)\ndefine dso_local void @iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, ptr noalias nocapture noundef readonly %2, ptr nocapture noundef readonly %3) #4 {\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !367)\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !370)\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !372)\n  tail call void @llvm.prefetch.p0(ptr %1, i32 0, i32 3, i32 1), !noalias !374\n  tail call void @llvm.prefetch.p0(ptr %2, i32 0, i32 3, i32 1), !noalias !375\n  %5 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %3, i64 0, i32 15\n  %6 = load i32, ptr %5, align 4, !tbaa !9, !noalias !376\n  %7 = and i32 %6, 256\n  %8 = icmp eq i32 %7, 0\n  br i1 %8, label %41, label %9\n\n9:                                                ; preds = %4\n  %10 = load &lt;16 x float&gt;, ptr %0, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %11 = getelementptr inbounds float, ptr %0, i64 16\n  %12 = load &lt;16 x float&gt;, ptr %11, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %13 = getelementptr inbounds float, ptr %0, i64 32\n  %14 = load &lt;16 x float&gt;, ptr %13, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %15 = getelementptr inbounds float, ptr %0, i64 48\n  %16 = load &lt;16 x float&gt;, ptr %15, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %17 = getelementptr inbounds float, ptr %0, i64 64\n  %18 = load &lt;16 x float&gt;, ptr %17, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %19 = getelementptr inbounds float, ptr %0, i64 80\n  %20 = load &lt;16 x float&gt;, ptr %19, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %21 = getelementptr inbounds float, ptr %0, i64 96\n  %22 = load &lt;16 x float&gt;, ptr %21, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %23 = getelementptr inbounds float, ptr %0, i64 112\n  %24 = load &lt;16 x float&gt;, ptr %23, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %25 = getelementptr inbounds float, ptr %0, i64 128\n  %26 = load &lt;16 x float&gt;, ptr %25, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %27 = getelementptr inbounds float, ptr %0, i64 144\n  %28 = load &lt;16 x float&gt;, ptr %27, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %29 = getelementptr inbounds float, ptr %0, i64 160\n  %30 = load &lt;16 x float&gt;, ptr %29, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %31 = getelementptr inbounds float, ptr %0, i64 176\n  %32 = load &lt;16 x float&gt;, ptr %31, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %33 = getelementptr inbounds float, ptr %0, i64 192\n  %34 = load &lt;16 x float&gt;, ptr %33, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %35 = getelementptr inbounds float, ptr %0, i64 208\n  %36 = load &lt;16 x float&gt;, ptr %35, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %37 = getelementptr inbounds float, ptr %0, i64 224\n  %38 = load &lt;16 x float&gt;, ptr %37, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %39 = getelementptr inbounds float, ptr %0, i64 240\n  %40 = load &lt;16 x float&gt;, ptr %39, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  br label %41\n\n41:                                               ; preds = %4, %9\n  %42 = phi &lt;16 x float&gt; [ %40, %9 ], [ zeroinitializer, %4 ]\n  %43 = phi &lt;16 x float&gt; [ %38, %9 ], [ zeroinitializer, %4 ]\n  %44 = phi &lt;16 x float&gt; [ %36, %9 ], [ zeroinitializer, %4 ]\n  %45 = phi &lt;16 x float&gt; [ %34, %9 ], [ zeroinitializer, %4 ]\n  %46 = phi &lt;16 x float&gt; [ %32, %9 ], [ zeroinitializer, %4 ]\n  %47 = phi &lt;16 x float&gt; [ %30, %9 ], [ zeroinitializer, %4 ]\n  %48 = phi &lt;16 x float&gt; [ %28, %9 ], [ zeroinitializer, %4 ]\n  %49 = phi &lt;16 x float&gt; [ %26, %9 ], [ zeroinitializer, %4 ]\n  %50 = phi &lt;16 x float&gt; [ %24, %9 ], [ zeroinitializer, %4 ]\n  %51 = phi &lt;16 x float&gt; [ %22, %9 ], [ zeroinitializer, %4 ]\n  %52 = phi &lt;16 x float&gt; [ %20, %9 ], [ zeroinitializer, %4 ]\n  %53 = phi &lt;16 x float&gt; [ %18, %9 ], [ zeroinitializer, %4 ]\n  %54 = phi &lt;16 x float&gt; [ %16, %9 ], [ zeroinitializer, %4 ]\n  %55 = phi &lt;16 x float&gt; [ %14, %9 ], [ zeroinitializer, %4 ]\n  %56 = phi &lt;16 x float&gt; [ %12, %9 ], [ zeroinitializer, %4 ]\n  %57 = phi &lt;16 x float&gt; [ %10, %9 ], [ zeroinitializer, %4 ]\n  %58 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %3, i64 0, i32 11\n  %59 = load i64, ptr %58, align 8, !tbaa !19, !noalias !376\n  %60 = icmp sgt i64 %59, 0\n  br i1 %60, label %61, label %167\n\n61:                                               ; preds = %41, %61\n  %62 = phi &lt;16 x float&gt; [ %161, %61 ], [ %42, %41 ]\n  %63 = phi &lt;16 x float&gt; [ %156, %61 ], [ %43, %41 ]\n  %64 = phi &lt;16 x float&gt; [ %151, %61 ], [ %44, %41 ]\n  %65 = phi &lt;16 x float&gt; [ %146, %61 ], [ %45, %41 ]\n  %66 = phi &lt;16 x float&gt; [ %141, %61 ], [ %46, %41 ]\n  %67 = phi &lt;16 x float&gt; [ %136, %61 ], [ %47, %41 ]\n  %68 = phi &lt;16 x float&gt; [ %131, %61 ], [ %48, %41 ]\n  %69 = phi &lt;16 x float&gt; [ %126, %61 ], [ %49, %41 ]\n  %70 = phi &lt;16 x float&gt; [ %121, %61 ], [ %50, %41 ]\n  %71 = phi &lt;16 x float&gt; [ %116, %61 ], [ %51, %41 ]\n  %72 = phi &lt;16 x float&gt; [ %111, %61 ], [ %52, %41 ]\n  %73 = phi &lt;16 x float&gt; [ %106, %61 ], [ %53, %41 ]\n  %74 = phi &lt;16 x float&gt; [ %101, %61 ], [ %54, %41 ]\n  %75 = phi &lt;16 x float&gt; [ %96, %61 ], [ %55, %41 ]\n  %76 = phi &lt;16 x float&gt; [ %91, %61 ], [ %56, %41 ]\n  %77 = phi &lt;16 x float&gt; [ %86, %61 ], [ %57, %41 ]\n  %78 = phi i64 [ %165, %61 ], [ 0, %41 ]\n  %79 = phi ptr [ %164, %61 ], [ %1, %41 ]\n  %80 = phi ptr [ %162, %61 ], [ %2, %41 ]\n  %81 = load &lt;16 x float&gt;, ptr %80, align 1, !tbaa !17, !alias.scope !372, !noalias !375\n  %82 = getelementptr inbounds float, ptr %80, i64 128\n  tail call void @llvm.prefetch.p0(ptr nonnull %82, i32 0, i32 3, i32 1), !noalias !375\n  %83 = load float, ptr %79, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %84 = insertelement &lt;16 x float&gt; poison, float %83, i64 0\n  %85 = shufflevector &lt;16 x float&gt; %84, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %86 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %85, &lt;16 x float&gt; %81, &lt;16 x float&gt; %77)\n  %87 = getelementptr inbounds float, ptr %79, i64 1\n  %88 = load float, ptr %87, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %89 = insertelement &lt;16 x float&gt; poison, float %88, i64 0\n  %90 = shufflevector &lt;16 x float&gt; %89, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %91 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %90, &lt;16 x float&gt; %81, &lt;16 x float&gt; %76)\n  %92 = getelementptr inbounds float, ptr %79, i64 2\n  %93 = load float, ptr %92, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %94 = insertelement &lt;16 x float&gt; poison, float %93, i64 0\n  %95 = shufflevector &lt;16 x float&gt; %94, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %96 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %95, &lt;16 x float&gt; %81, &lt;16 x float&gt; %75)\n  %97 = getelementptr inbounds float, ptr %79, i64 3\n  %98 = load float, ptr %97, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %99 = insertelement &lt;16 x float&gt; poison, float %98, i64 0\n  %100 = shufflevector &lt;16 x float&gt; %99, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %101 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %100, &lt;16 x float&gt; %81, &lt;16 x float&gt; %74)\n  %102 = getelementptr inbounds float, ptr %79, i64 4\n  %103 = load float, ptr %102, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %104 = insertelement &lt;16 x float&gt; poison, float %103, i64 0\n  %105 = shufflevector &lt;16 x float&gt; %104, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %106 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %105, &lt;16 x float&gt; %81, &lt;16 x float&gt; %73)\n  %107 = getelementptr inbounds float, ptr %79, i64 5\n  %108 = load float, ptr %107, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %109 = insertelement &lt;16 x float&gt; poison, float %108, i64 0\n  %110 = shufflevector &lt;16 x float&gt; %109, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %111 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %110, &lt;16 x float&gt; %81, &lt;16 x float&gt; %72)\n  %112 = getelementptr inbounds float, ptr %79, i64 6\n  %113 = load float, ptr %112, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %114 = insertelement &lt;16 x float&gt; poison, float %113, i64 0\n  %115 = shufflevector &lt;16 x float&gt; %114, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %116 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %115, &lt;16 x float&gt; %81, &lt;16 x float&gt; %71)\n  %117 = getelementptr inbounds float, ptr %79, i64 7\n  %118 = load float, ptr %117, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %119 = insertelement &lt;16 x float&gt; poison, float %118, i64 0\n  %120 = shufflevector &lt;16 x float&gt; %119, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %121 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %120, &lt;16 x float&gt; %81, &lt;16 x float&gt; %70)\n  %122 = getelementptr inbounds float, ptr %79, i64 8\n  %123 = load float, ptr %122, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %124 = insertelement &lt;16 x float&gt; poison, float %123, i64 0\n  %125 = shufflevector &lt;16 x float&gt; %124, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %126 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %125, &lt;16 x float&gt; %81, &lt;16 x float&gt; %69)\n  %127 = getelementptr inbounds float, ptr %79, i64 9\n  %128 = load float, ptr %127, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %129 = insertelement &lt;16 x float&gt; poison, float %128, i64 0\n  %130 = shufflevector &lt;16 x float&gt; %129, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %131 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %130, &lt;16 x float&gt; %81, &lt;16 x float&gt; %68)\n  %132 = getelementptr inbounds float, ptr %79, i64 10\n  %133 = load float, ptr %132, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %134 = insertelement &lt;16 x float&gt; poison, float %133, i64 0\n  %135 = shufflevector &lt;16 x float&gt; %134, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %136 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %135, &lt;16 x float&gt; %81, &lt;16 x float&gt; %67)\n  %137 = getelementptr inbounds float, ptr %79, i64 11\n  %138 = load float, ptr %137, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %139 = insertelement &lt;16 x float&gt; poison, float %138, i64 0\n  %140 = shufflevector &lt;16 x float&gt; %139, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %141 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %140, &lt;16 x float&gt; %81, &lt;16 x float&gt; %66)\n  %142 = getelementptr inbounds float, ptr %79, i64 12\n  %143 = load float, ptr %142, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %144 = insertelement &lt;16 x float&gt; poison, float %143, i64 0\n  %145 = shufflevector &lt;16 x float&gt; %144, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %146 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %145, &lt;16 x float&gt; %81, &lt;16 x float&gt; %65)\n  %147 = getelementptr inbounds float, ptr %79, i64 13\n  %148 = load float, ptr %147, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %149 = insertelement &lt;16 x float&gt; poison, float %148, i64 0\n  %150 = shufflevector &lt;16 x float&gt; %149, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %151 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %150, &lt;16 x float&gt; %81, &lt;16 x float&gt; %64)\n  %152 = getelementptr inbounds float, ptr %79, i64 14\n  %153 = load float, ptr %152, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %154 = insertelement &lt;16 x float&gt; poison, float %153, i64 0\n  %155 = shufflevector &lt;16 x float&gt; %154, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %156 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %155, &lt;16 x float&gt; %81, &lt;16 x float&gt; %63)\n  %157 = getelementptr inbounds float, ptr %79, i64 15\n  %158 = load float, ptr %157, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %159 = insertelement &lt;16 x float&gt; poison, float %158, i64 0\n  %160 = shufflevector &lt;16 x float&gt; %159, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %161 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %160, &lt;16 x float&gt; %81, &lt;16 x float&gt; %62)\n  %162 = getelementptr inbounds float, ptr %80, i64 16\n  %163 = getelementptr inbounds float, ptr %79, i64 128\n  tail call void @llvm.prefetch.p0(ptr nonnull %163, i32 0, i32 3, i32 1), !noalias !374\n  %164 = getelementptr inbounds float, ptr %79, i64 16\n  %165 = add nuw nsw i64 %78, 1\n  %166 = icmp eq i64 %165, %59\n  br i1 %166, label %167, label %61, !llvm.loop !333\n\n167:                                              ; preds = %61, %41\n  %168 = phi &lt;16 x float&gt; [ %42, %41 ], [ %161, %61 ]\n  %169 = phi &lt;16 x float&gt; [ %43, %41 ], [ %156, %61 ]\n  %170 = phi &lt;16 x float&gt; [ %44, %41 ], [ %151, %61 ]\n  %171 = phi &lt;16 x float&gt; [ %45, %41 ], [ %146, %61 ]\n  %172 = phi &lt;16 x float&gt; [ %46, %41 ], [ %141, %61 ]\n  %173 = phi &lt;16 x float&gt; [ %47, %41 ], [ %136, %61 ]\n  %174 = phi &lt;16 x float&gt; [ %48, %41 ], [ %131, %61 ]\n  %175 = phi &lt;16 x float&gt; [ %49, %41 ], [ %126, %61 ]\n  %176 = phi &lt;16 x float&gt; [ %50, %41 ], [ %121, %61 ]\n  %177 = phi &lt;16 x float&gt; [ %51, %41 ], [ %116, %61 ]\n  %178 = phi &lt;16 x float&gt; [ %52, %41 ], [ %111, %61 ]\n  %179 = phi &lt;16 x float&gt; [ %53, %41 ], [ %106, %61 ]\n  %180 = phi &lt;16 x float&gt; [ %54, %41 ], [ %101, %61 ]\n  %181 = phi &lt;16 x float&gt; [ %55, %41 ], [ %96, %61 ]\n  %182 = phi &lt;16 x float&gt; [ %56, %41 ], [ %91, %61 ]\n  %183 = phi &lt;16 x float&gt; [ %57, %41 ], [ %86, %61 ]\n  store &lt;16 x float&gt; %183, ptr %0, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %184 = getelementptr inbounds float, ptr %0, i64 16\n  store &lt;16 x float&gt; %182, ptr %184, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %185 = getelementptr inbounds float, ptr %0, i64 32\n  store &lt;16 x float&gt; %181, ptr %185, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %186 = getelementptr inbounds float, ptr %0, i64 48\n  store &lt;16 x float&gt; %180, ptr %186, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %187 = getelementptr inbounds float, ptr %0, i64 64\n  store &lt;16 x float&gt; %179, ptr %187, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %188 = getelementptr inbounds float, ptr %0, i64 80\n  store &lt;16 x float&gt; %178, ptr %188, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %189 = getelementptr inbounds float, ptr %0, i64 96\n  store &lt;16 x float&gt; %177, ptr %189, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %190 = getelementptr inbounds float, ptr %0, i64 112\n  store &lt;16 x float&gt; %176, ptr %190, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %191 = getelementptr inbounds float, ptr %0, i64 128\n  store &lt;16 x float&gt; %175, ptr %191, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %192 = getelementptr inbounds float, ptr %0, i64 144\n  store &lt;16 x float&gt; %174, ptr %192, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %193 = getelementptr inbounds float, ptr %0, i64 160\n  store &lt;16 x float&gt; %173, ptr %193, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %194 = getelementptr inbounds float, ptr %0, i64 176\n  store &lt;16 x float&gt; %172, ptr %194, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %195 = getelementptr inbounds float, ptr %0, i64 192\n  store &lt;16 x float&gt; %171, ptr %195, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %196 = getelementptr inbounds float, ptr %0, i64 208\n  store &lt;16 x float&gt; %170, ptr %196, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %197 = getelementptr inbounds float, ptr %0, i64 224\n  store &lt;16 x float&gt; %169, ptr %197, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %198 = getelementptr inbounds float, ptr %0, i64 240\n  store &lt;16 x float&gt; %168, ptr %198, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  ret void\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#intermediate-file-optimizedbc-disassembled-to-optimizedll","title":"Intermediate file: <code>...optimized.bc</code>, disassembled to <code>...optimized.ll</code>","text":"<pre><code>; Function Attrs: nofree norecurse nosync nounwind\ndefine internal noundef i32 @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(ptr noalias nocapture nonnull readonly align 16 %0, ptr noalias nocapture nonnull readonly align 16 %1, ptr noalias nocapture nonnull readonly align 16 %2) #1 !dbg !90 {\n  %.elt7 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 4, !dbg !91\n  %.unpack8 = load i32, ptr %.elt7, align 4, !dbg !91\n  %.elt9 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 5, !dbg !91\n  %.unpack10 = load i32, ptr %.elt9, align 16, !dbg !91\n  %.elt17 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 9, !dbg !91\n  %.unpack18 = load ptr, ptr %.elt17, align 8, !dbg !91\n  %.elt19 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 10, !dbg !91\n  %.unpack20 = load ptr, ptr %.elt19, align 16, !dbg !91\n  %4 = getelementptr i32, ptr %.unpack18, i64 4, !dbg !91\n  %5 = load i64, ptr %4, align 4, !dbg !91\n  %6 = getelementptr i32, ptr %.unpack18, i64 6, !dbg !91\n  %7 = load i32, ptr %6, align 4, !dbg !91\n  %8 = getelementptr i32, ptr %.unpack18, i64 7, !dbg !91\n  %9 = load i32, ptr %8, align 4, !dbg !91\n  %10 = getelementptr i32, ptr %.unpack18, i64 8, !dbg !91\n  %11 = load i64, ptr %10, align 4, !dbg !91\n  %12 = getelementptr i32, ptr %.unpack18, i64 10, !dbg !91\n  %13 = load i64, ptr %12, align 4, !dbg !91\n  %14 = shl i64 %13, 4, !dbg !91\n  %15 = getelementptr i32, ptr %.unpack18, i64 14, !dbg !91\n  %16 = load i64, ptr %15, align 4, !dbg !91\n  %17 = shl i64 %16, 8, !dbg !91\n  %18 = zext i32 %7 to i64, !dbg !91\n  %19 = zext i32 %9 to i64, !dbg !91\n  %20 = shl nuw i64 %19, 32, !dbg !91\n  %21 = or disjoint i64 %20, %18, !dbg !91\n  %22 = load ptr, ptr %.unpack20, align 8, !dbg !91\n  %23 = getelementptr ptr, ptr %.unpack20, i64 1, !dbg !91\n  %24 = load ptr, ptr %23, align 8, !dbg !91\n  %25 = load %iree_hal_executable_workgroup_state_v0_t.20, ptr %2, align 16, !dbg !91\n  %26 = extractvalue %iree_hal_executable_workgroup_state_v0_t.20 %25, 0, !dbg !91\n  %27 = zext i32 %26 to i64, !dbg !91\n  %28 = zext i32 %.unpack8 to i64, !dbg !91\n  %29 = extractvalue %iree_hal_executable_workgroup_state_v0_t.20 %25, 1, !dbg !91\n  %30 = zext i32 %29 to i64, !dbg !91\n  %31 = zext i32 %.unpack10 to i64, !dbg !91\n  %32 = icmp sgt i64 %5, %30, !dbg !91\n  br i1 %32, label %.preheader.lr.ph, label %._crit_edge58, !dbg !91\n\n.preheader.lr.ph:                                 ; preds = %3\n  %33 = getelementptr i32, ptr %.unpack18, i64 3, !dbg !91\n  %34 = load i32, ptr %33, align 4, !dbg !91\n  %35 = zext i32 %34 to i64, !dbg !91\n  %36 = shl nuw i64 %35, 32, !dbg !91\n  %37 = getelementptr i32, ptr %.unpack18, i64 2, !dbg !91\n  %38 = load i32, ptr %37, align 4, !dbg !91\n  %39 = zext i32 %38 to i64, !dbg !91\n  %40 = or disjoint i64 %36, %39, !dbg !91\n  %41 = getelementptr i32, ptr %.unpack18, i64 1, !dbg !91\n  %42 = load i32, ptr %41, align 4, !dbg !91\n  %43 = zext i32 %42 to i64, !dbg !91\n  %44 = shl nuw i64 %43, 32, !dbg !91\n  %45 = load i32, ptr %.unpack18, align 4, !dbg !91\n  %46 = zext i32 %45 to i64, !dbg !91\n  %47 = or disjoint i64 %44, %46, !dbg !91\n  %48 = icmp sgt i64 %11, %27\n  %.lobit = ashr i64 %44, 63\n  %49 = xor i64 %47, %.lobit\n  %50 = sdiv i64 %49, 4\n  %51 = xor i64 %50, %.lobit\n  %.lobit24 = ashr i64 %36, 63\n  %52 = xor i64 %40, %.lobit24\n  %53 = sdiv i64 %52, 4\n  %54 = xor i64 %53, %.lobit24\n  %55 = icmp eq i64 %21, 0\n  %56 = shl i64 %21, 9\n  %57 = icmp sgt i64 %21, 0\n  br label %.preheader, !dbg !91\n\n.preheader:                                       ; preds = %._crit_edge, %.preheader.lr.ph\n  %58 = phi i64 [ %30, %.preheader.lr.ph ], [ %228, %._crit_edge ]\n  br i1 %48, label %.lr.ph, label %._crit_edge, !dbg !91\n\n.lr.ph:                                           ; preds = %.preheader\n  %59 = mul i64 %17, %58\n  %60 = add i64 %59, %54\n  %61 = mul i64 %56, %58\n  %62 = ashr exact i64 %61, 3\n  %63 = getelementptr inbounds i8, ptr %22, i64 %62\n  %64 = shl i64 %60, 2\n  %invariant.gep = getelementptr i8, ptr %24, i64 %64, !dbg !91\n  br label %65, !dbg !91\n\n65:                                               ; preds = %iree_uk_mmt4d.exit, %.lr.ph\n  %66 = phi i64 [ %27, %.lr.ph ], [ %226, %iree_uk_mmt4d.exit ]\n  br i1 %55, label %iree_uk_mmt4d.exit, label %67, !dbg !91\n\n67:                                               ; preds = %65\n  %68 = mul i64 %14, %66, !dbg !91\n  %69 = add i64 %68, %51, !dbg !91\n  %70 = shl i64 %69, 5, !dbg !91\n  %71 = ashr exact i64 %70, 3, !dbg !91\n  %72 = getelementptr inbounds i8, ptr %22, i64 %71, !dbg !91\n  %73 = shl i64 %66, 10, !dbg !91\n  %gep = getelementptr i8, ptr %invariant.gep, i64 %73, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %gep, i32 1, i32 1, i32 1), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %63, i32 0, i32 3, i32 1), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %72, i32 0, i32 3, i32 1), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !92), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !95), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !97), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !99), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !102), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !104), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %63, i32 0, i32 3, i32 1), !dbg !91, !noalias !106\n  tail call void @llvm.prefetch.p0(ptr %72, i32 0, i32 3, i32 1), !dbg !91, !noalias !107\n  %74 = load &lt;16 x float&gt;, ptr %gep, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %75 = getelementptr inbounds float, ptr %gep, i64 16, !dbg !91\n  %76 = load &lt;16 x float&gt;, ptr %75, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %77 = getelementptr inbounds float, ptr %gep, i64 32, !dbg !91\n  %78 = load &lt;16 x float&gt;, ptr %77, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %79 = getelementptr inbounds float, ptr %gep, i64 48, !dbg !91\n  %80 = load &lt;16 x float&gt;, ptr %79, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %81 = getelementptr inbounds float, ptr %gep, i64 64, !dbg !91\n  %82 = load &lt;16 x float&gt;, ptr %81, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %83 = getelementptr inbounds float, ptr %gep, i64 80, !dbg !91\n  %84 = load &lt;16 x float&gt;, ptr %83, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %85 = getelementptr inbounds float, ptr %gep, i64 96, !dbg !91\n  %86 = load &lt;16 x float&gt;, ptr %85, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %87 = getelementptr inbounds float, ptr %gep, i64 112, !dbg !91\n  %88 = load &lt;16 x float&gt;, ptr %87, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %89 = getelementptr inbounds float, ptr %gep, i64 128, !dbg !91\n  %90 = load &lt;16 x float&gt;, ptr %89, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %91 = getelementptr inbounds float, ptr %gep, i64 144, !dbg !91\n  %92 = load &lt;16 x float&gt;, ptr %91, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %93 = getelementptr inbounds float, ptr %gep, i64 160, !dbg !91\n  %94 = load &lt;16 x float&gt;, ptr %93, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %95 = getelementptr inbounds float, ptr %gep, i64 176, !dbg !91\n  %96 = load &lt;16 x float&gt;, ptr %95, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %97 = getelementptr inbounds float, ptr %gep, i64 192, !dbg !91\n  %98 = load &lt;16 x float&gt;, ptr %97, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %99 = getelementptr inbounds float, ptr %gep, i64 208, !dbg !91\n  %100 = load &lt;16 x float&gt;, ptr %99, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %101 = getelementptr inbounds float, ptr %gep, i64 224, !dbg !91\n  %102 = load &lt;16 x float&gt;, ptr %101, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %103 = getelementptr inbounds float, ptr %gep, i64 240, !dbg !91\n  %104 = load &lt;16 x float&gt;, ptr %103, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  br i1 %57, label %.preheader.i, label %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, !dbg !91\n\n.preheader.i:                                     ; preds = %.preheader.i, %67\n  %105 = phi &lt;16 x float&gt; [ %204, %.preheader.i ], [ %104, %67 ], !dbg !91\n  %106 = phi &lt;16 x float&gt; [ %199, %.preheader.i ], [ %102, %67 ], !dbg !91\n  %107 = phi &lt;16 x float&gt; [ %194, %.preheader.i ], [ %100, %67 ], !dbg !91\n  %108 = phi &lt;16 x float&gt; [ %189, %.preheader.i ], [ %98, %67 ], !dbg !91\n  %109 = phi &lt;16 x float&gt; [ %184, %.preheader.i ], [ %96, %67 ], !dbg !91\n  %110 = phi &lt;16 x float&gt; [ %179, %.preheader.i ], [ %94, %67 ], !dbg !91\n  %111 = phi &lt;16 x float&gt; [ %174, %.preheader.i ], [ %92, %67 ], !dbg !91\n  %112 = phi &lt;16 x float&gt; [ %169, %.preheader.i ], [ %90, %67 ], !dbg !91\n  %113 = phi &lt;16 x float&gt; [ %164, %.preheader.i ], [ %88, %67 ], !dbg !91\n  %114 = phi &lt;16 x float&gt; [ %159, %.preheader.i ], [ %86, %67 ], !dbg !91\n  %115 = phi &lt;16 x float&gt; [ %154, %.preheader.i ], [ %84, %67 ], !dbg !91\n  %116 = phi &lt;16 x float&gt; [ %149, %.preheader.i ], [ %82, %67 ], !dbg !91\n  %117 = phi &lt;16 x float&gt; [ %144, %.preheader.i ], [ %80, %67 ], !dbg !91\n  %118 = phi &lt;16 x float&gt; [ %139, %.preheader.i ], [ %78, %67 ], !dbg !91\n  %119 = phi &lt;16 x float&gt; [ %134, %.preheader.i ], [ %76, %67 ], !dbg !91\n  %120 = phi &lt;16 x float&gt; [ %129, %.preheader.i ], [ %74, %67 ], !dbg !91\n  %121 = phi i64 [ %208, %.preheader.i ], [ 0, %67 ], !dbg !91\n  %122 = phi ptr [ %207, %.preheader.i ], [ %63, %67 ], !dbg !91\n  %123 = phi ptr [ %205, %.preheader.i ], [ %72, %67 ], !dbg !91\n  %124 = load &lt;16 x float&gt;, ptr %123, align 1, !dbg !91, !tbaa !108, !alias.scope !113, !noalias !107\n  %125 = getelementptr inbounds float, ptr %123, i64 128, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr nonnull %125, i32 0, i32 3, i32 1), !dbg !91, !noalias !107\n  %126 = load float, ptr %122, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %127 = insertelement &lt;16 x float&gt; poison, float %126, i64 0, !dbg !91\n  %128 = shufflevector &lt;16 x float&gt; %127, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %129 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %128, &lt;16 x float&gt; %124, &lt;16 x float&gt; %120), !dbg !91\n  %130 = getelementptr inbounds float, ptr %122, i64 1, !dbg !91\n  %131 = load float, ptr %130, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %132 = insertelement &lt;16 x float&gt; poison, float %131, i64 0, !dbg !91\n  %133 = shufflevector &lt;16 x float&gt; %132, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %134 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %133, &lt;16 x float&gt; %124, &lt;16 x float&gt; %119), !dbg !91\n  %135 = getelementptr inbounds float, ptr %122, i64 2, !dbg !91\n  %136 = load float, ptr %135, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %137 = insertelement &lt;16 x float&gt; poison, float %136, i64 0, !dbg !91\n  %138 = shufflevector &lt;16 x float&gt; %137, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %139 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %138, &lt;16 x float&gt; %124, &lt;16 x float&gt; %118), !dbg !91\n  %140 = getelementptr inbounds float, ptr %122, i64 3, !dbg !91\n  %141 = load float, ptr %140, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %142 = insertelement &lt;16 x float&gt; poison, float %141, i64 0, !dbg !91\n  %143 = shufflevector &lt;16 x float&gt; %142, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %144 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %143, &lt;16 x float&gt; %124, &lt;16 x float&gt; %117), !dbg !91\n  %145 = getelementptr inbounds float, ptr %122, i64 4, !dbg !91\n  %146 = load float, ptr %145, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %147 = insertelement &lt;16 x float&gt; poison, float %146, i64 0, !dbg !91\n  %148 = shufflevector &lt;16 x float&gt; %147, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %149 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %148, &lt;16 x float&gt; %124, &lt;16 x float&gt; %116), !dbg !91\n  %150 = getelementptr inbounds float, ptr %122, i64 5, !dbg !91\n  %151 = load float, ptr %150, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %152 = insertelement &lt;16 x float&gt; poison, float %151, i64 0, !dbg !91\n  %153 = shufflevector &lt;16 x float&gt; %152, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %154 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %153, &lt;16 x float&gt; %124, &lt;16 x float&gt; %115), !dbg !91\n  %155 = getelementptr inbounds float, ptr %122, i64 6, !dbg !91\n  %156 = load float, ptr %155, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %157 = insertelement &lt;16 x float&gt; poison, float %156, i64 0, !dbg !91\n  %158 = shufflevector &lt;16 x float&gt; %157, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %159 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %158, &lt;16 x float&gt; %124, &lt;16 x float&gt; %114), !dbg !91\n  %160 = getelementptr inbounds float, ptr %122, i64 7, !dbg !91\n  %161 = load float, ptr %160, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %162 = insertelement &lt;16 x float&gt; poison, float %161, i64 0, !dbg !91\n  %163 = shufflevector &lt;16 x float&gt; %162, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %164 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %163, &lt;16 x float&gt; %124, &lt;16 x float&gt; %113), !dbg !91\n  %165 = getelementptr inbounds float, ptr %122, i64 8, !dbg !91\n  %166 = load float, ptr %165, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %167 = insertelement &lt;16 x float&gt; poison, float %166, i64 0, !dbg !91\n  %168 = shufflevector &lt;16 x float&gt; %167, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %169 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %168, &lt;16 x float&gt; %124, &lt;16 x float&gt; %112), !dbg !91\n  %170 = getelementptr inbounds float, ptr %122, i64 9, !dbg !91\n  %171 = load float, ptr %170, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %172 = insertelement &lt;16 x float&gt; poison, float %171, i64 0, !dbg !91\n  %173 = shufflevector &lt;16 x float&gt; %172, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %174 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %173, &lt;16 x float&gt; %124, &lt;16 x float&gt; %111), !dbg !91\n  %175 = getelementptr inbounds float, ptr %122, i64 10, !dbg !91\n  %176 = load float, ptr %175, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %177 = insertelement &lt;16 x float&gt; poison, float %176, i64 0, !dbg !91\n  %178 = shufflevector &lt;16 x float&gt; %177, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %179 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %178, &lt;16 x float&gt; %124, &lt;16 x float&gt; %110), !dbg !91\n  %180 = getelementptr inbounds float, ptr %122, i64 11, !dbg !91\n  %181 = load float, ptr %180, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %182 = insertelement &lt;16 x float&gt; poison, float %181, i64 0, !dbg !91\n  %183 = shufflevector &lt;16 x float&gt; %182, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %184 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %183, &lt;16 x float&gt; %124, &lt;16 x float&gt; %109), !dbg !91\n  %185 = getelementptr inbounds float, ptr %122, i64 12, !dbg !91\n  %186 = load float, ptr %185, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %187 = insertelement &lt;16 x float&gt; poison, float %186, i64 0, !dbg !91\n  %188 = shufflevector &lt;16 x float&gt; %187, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %189 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %188, &lt;16 x float&gt; %124, &lt;16 x float&gt; %108), !dbg !91\n  %190 = getelementptr inbounds float, ptr %122, i64 13, !dbg !91\n  %191 = load float, ptr %190, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %192 = insertelement &lt;16 x float&gt; poison, float %191, i64 0, !dbg !91\n  %193 = shufflevector &lt;16 x float&gt; %192, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %194 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %193, &lt;16 x float&gt; %124, &lt;16 x float&gt; %107), !dbg !91\n  %195 = getelementptr inbounds float, ptr %122, i64 14, !dbg !91\n  %196 = load float, ptr %195, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %197 = insertelement &lt;16 x float&gt; poison, float %196, i64 0, !dbg !91\n  %198 = shufflevector &lt;16 x float&gt; %197, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %199 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %198, &lt;16 x float&gt; %124, &lt;16 x float&gt; %106), !dbg !91\n  %200 = getelementptr inbounds float, ptr %122, i64 15, !dbg !91\n  %201 = load float, ptr %200, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %202 = insertelement &lt;16 x float&gt; poison, float %201, i64 0, !dbg !91\n  %203 = shufflevector &lt;16 x float&gt; %202, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %204 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %203, &lt;16 x float&gt; %124, &lt;16 x float&gt; %105), !dbg !91\n  %205 = getelementptr inbounds float, ptr %123, i64 16, !dbg !91\n  %206 = getelementptr inbounds float, ptr %122, i64 128, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr nonnull %206, i32 0, i32 3, i32 1), !dbg !91, !noalias !106\n  %207 = getelementptr inbounds float, ptr %122, i64 16, !dbg !91\n  %208 = add nuw nsw i64 %121, 1, !dbg !91\n  %209 = icmp eq i64 %208, %21, !dbg !91\n  br i1 %209, label %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, label %.preheader.i, !dbg !91, !llvm.loop !117\n\niree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit: ; preds = %.preheader.i, %67\n  %210 = phi &lt;16 x float&gt; [ %104, %67 ], [ %204, %.preheader.i ], !dbg !91\n  %211 = phi &lt;16 x float&gt; [ %102, %67 ], [ %199, %.preheader.i ], !dbg !91\n  %212 = phi &lt;16 x float&gt; [ %100, %67 ], [ %194, %.preheader.i ], !dbg !91\n  %213 = phi &lt;16 x float&gt; [ %98, %67 ], [ %189, %.preheader.i ], !dbg !91\n  %214 = phi &lt;16 x float&gt; [ %96, %67 ], [ %184, %.preheader.i ], !dbg !91\n  %215 = phi &lt;16 x float&gt; [ %94, %67 ], [ %179, %.preheader.i ], !dbg !91\n  %216 = phi &lt;16 x float&gt; [ %92, %67 ], [ %174, %.preheader.i ], !dbg !91\n  %217 = phi &lt;16 x float&gt; [ %90, %67 ], [ %169, %.preheader.i ], !dbg !91\n  %218 = phi &lt;16 x float&gt; [ %88, %67 ], [ %164, %.preheader.i ], !dbg !91\n  %219 = phi &lt;16 x float&gt; [ %86, %67 ], [ %159, %.preheader.i ], !dbg !91\n  %220 = phi &lt;16 x float&gt; [ %84, %67 ], [ %154, %.preheader.i ], !dbg !91\n  %221 = phi &lt;16 x float&gt; [ %82, %67 ], [ %149, %.preheader.i ], !dbg !91\n  %222 = phi &lt;16 x float&gt; [ %80, %67 ], [ %144, %.preheader.i ], !dbg !91\n  %223 = phi &lt;16 x float&gt; [ %78, %67 ], [ %139, %.preheader.i ], !dbg !91\n  %224 = phi &lt;16 x float&gt; [ %76, %67 ], [ %134, %.preheader.i ], !dbg !91\n  %225 = phi &lt;16 x float&gt; [ %74, %67 ], [ %129, %.preheader.i ], !dbg !91\n  store &lt;16 x float&gt; %225, ptr %gep, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %224, ptr %75, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %223, ptr %77, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %222, ptr %79, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %221, ptr %81, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %220, ptr %83, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %219, ptr %85, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %218, ptr %87, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %217, ptr %89, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %216, ptr %91, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %215, ptr %93, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %214, ptr %95, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %213, ptr %97, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %212, ptr %99, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %211, ptr %101, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %210, ptr %103, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  br label %iree_uk_mmt4d.exit, !dbg !91\n\niree_uk_mmt4d.exit:                               ; preds = %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, %65\n  %226 = add i64 %66, %28, !dbg !91\n  %227 = icmp slt i64 %226, %11, !dbg !91\n  br i1 %227, label %65, label %._crit_edge, !dbg !91\n\n._crit_edge:                                      ; preds = %iree_uk_mmt4d.exit, %.preheader\n  %228 = add i64 %58, %31, !dbg !91\n  %229 = icmp slt i64 %228, %5, !dbg !91\n  br i1 %229, label %.preheader, label %._crit_edge58, !dbg !91\n\n._crit_edge58:                                    ; preds = %._crit_edge, %3\n  ret i32 0, !dbg !91\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#x86-assembly","title":"x86 assembly","text":"<pre><code>  .section  .text.matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32,\"ax\",@progbits\n  .p2align  4, 0x90\n  .type  matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32,@function\nmatmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32:\n.Lfunc_begin3:\n  .loc  1 1 0 is_stmt 1\n  .cfi_startproc\n  push  rbp\n  .cfi_def_cfa_offset 16\n  .cfi_offset rbp, -16\n  mov  rbp, rsp\n  .cfi_def_cfa_register rbp\n.Ltmp6:\n  push  r15\n  push  r14\n  push  r13\n  push  r12\n  push  rbx\n  .cfi_offset rbx, -56\n  .cfi_offset r12, -48\n  .cfi_offset r13, -40\n  .cfi_offset r14, -32\n  .cfi_offset r15, -24\n  .loc  1 1 1 prologue_end\n  mov  rcx, qword ptr [rsi + 24]\n  mov  edi, dword ptr [rdx + 4]\n  mov  rax, qword ptr [rcx + 16]\n  mov  qword ptr [rbp - 48], rdi\n  mov  qword ptr [rbp - 112], rax\n  cmp  rax, rdi\n  jle  .LBB3_11\n  mov  eax, dword ptr [rsi + 16]\n  mov  edi, dword ptr [rsi + 12]\n  mov  r12, qword ptr [rsi + 32]\n  mov  rsi, qword ptr [rcx + 40]\n  mov  r9, qword ptr [rcx + 56]\n  mov  ebx, dword ptr [rcx + 4]\n  mov  r10d, dword ptr [rcx]\n  mov  r11, qword ptr [rcx + 24]\n  mov  r14, qword ptr [rcx + 32]\n  mov  r8, rsi\n  shl  r8, 4\n  mov  qword ptr [rbp - 104], rax\n  shl  r9, 8\n  mov  rax, qword ptr [r12 + 8]\n  shl  rbx, 32\n  mov  qword ptr [rbp - 128], r8\n  mov  r8d, dword ptr [rcx + 12]\n  mov  qword ptr [rbp - 96], r9\n  mov  r9d, dword ptr [rcx + 8]\n  or  r10, rbx\n  sar  rbx, 63\n  xor  r10, rbx\n  lea  r15, [r10 + 3]\n  mov  qword ptr [rbp - 80], rax\n  mov  eax, dword ptr [rdx]\n  shl  r8, 32\n  or  r9, r8\n  test  r10, r10\n  cmovns  r15, r10\n  sar  r8, 63\n  sar  r15, 2\n  xor  r9, r8\n  xor  r15, rbx\n  lea  rcx, [r9 + 3]\n  test  r9, r9\n  mov  qword ptr [rbp - 56], rax\n  cmovns  rcx, r9\n  imul  rax, rsi\n  mov  r9, qword ptr [r12]\n  imul  rsi, rdi\n  mov  qword ptr [rbp - 120], r15\n  sar  rcx, 2\n  xor  rcx, r8\n  shl  rax, 6\n  mov  qword ptr [rbp - 88], rcx\n  mov  rcx, r11\n  shl  rcx, 9\n  shl  rsi, 6\n  lea  rax, [rax + 4*r15]\n  mov  qword ptr [rbp - 72], rcx\n  mov  qword ptr [rbp - 64], rax\n  jmp  .LBB3_2\n  .p2align  4, 0x90\n.LBB3_10:\n  .loc  1 0 1 is_stmt 0\n  mov  rax, qword ptr [rbp - 48]\n  .loc  1 1 1\n  add  rax, qword ptr [rbp - 104]\n  mov  qword ptr [rbp - 48], rax\n  cmp  rax, qword ptr [rbp - 112]\n  jge  .LBB3_11\n.LBB3_2:\n  .loc  1 0 1\n  cmp  r14, qword ptr [rbp - 56]\n  .loc  1 1 1\n  jle  .LBB3_10\n  .loc  1 0 1\n  mov  rax, qword ptr [rbp - 96]\n  mov  rcx, qword ptr [rbp - 48]\n  mov  r10, qword ptr [rbp - 72]\n  mov  rdx, qword ptr [rbp - 80]\n  mov  r8, qword ptr [rbp - 64]\n  imul  rax, rcx\n  add  rax, qword ptr [rbp - 88]\n  imul  r10, rcx\n  sar  r10, 3\n  lea  r13, [r9 + r10]\n  .loc  1 1 1\n  lea  r15, [rdx + 4*rax]\n  mov  rax, qword ptr [rbp - 56]\n  jmp  .LBB3_4\n  .p2align  4, 0x90\n.LBB3_8:\n  add  rdx, r15\n  vmovups  zmmword ptr [rdx], zmm15\n  vmovups  zmmword ptr [rdx + 64], zmm14\n  vmovups  zmmword ptr [rdx + 128], zmm13\n  vmovups  zmmword ptr [rdx + 192], zmm12\n  vmovups  zmmword ptr [rdx + 256], zmm11\n  vmovups  zmmword ptr [rdx + 320], zmm10\n  vmovups  zmmword ptr [rdx + 384], zmm9\n  vmovups  zmmword ptr [rdx + 448], zmm8\n  vmovups  zmmword ptr [rdx + 512], zmm7\n  vmovups  zmmword ptr [rdx + 576], zmm6\n  vmovups  zmmword ptr [rdx + 640], zmm5\n  vmovups  zmmword ptr [rdx + 704], zmm4\n  vmovups  zmmword ptr [rdx + 768], zmm3\n  vmovups  zmmword ptr [rdx + 832], zmm2\n  vmovups  zmmword ptr [rdx + 896], zmm1\n  vmovups  zmmword ptr [rdx + 960], zmm0\n.LBB3_9:\n  add  rax, rdi\n  add  r8, rsi\n  cmp  rax, r14\n  jge  .LBB3_10\n.LBB3_4:\n  .loc  1 0 1\n  test  r11, r11\n  .loc  1 1 1\n  je  .LBB3_9\n  .loc  1 0 1\n  mov  rcx, qword ptr [rbp - 128]\n  .loc  1 1 1\n  mov  rdx, rax\n  shl  rdx, 10\n  prefetchw  byte ptr [r15 + rdx]\n  prefetcht0  byte ptr [r13]\n  imul  rcx, rax\n  add  rcx, qword ptr [rbp - 120]\n  shl  rcx, 5\n  sar  rcx, 3\n  prefetcht0  byte ptr [r9 + rcx]\n  prefetcht0  byte ptr [r13]\n  prefetcht0  byte ptr [r9 + rcx]\n  vmovups  zmm15, zmmword ptr [r15 + rdx]\n  vmovups  zmm14, zmmword ptr [r15 + rdx + 64]\n  vmovups  zmm13, zmmword ptr [r15 + rdx + 128]\n  vmovups  zmm12, zmmword ptr [r15 + rdx + 192]\n  vmovups  zmm11, zmmword ptr [r15 + rdx + 256]\n  vmovups  zmm10, zmmword ptr [r15 + rdx + 320]\n  vmovups  zmm9, zmmword ptr [r15 + rdx + 384]\n  vmovups  zmm8, zmmword ptr [r15 + rdx + 448]\n  vmovups  zmm7, zmmword ptr [r15 + rdx + 512]\n  vmovups  zmm6, zmmword ptr [r15 + rdx + 576]\n  vmovups  zmm5, zmmword ptr [r15 + rdx + 640]\n  vmovups  zmm4, zmmword ptr [r15 + rdx + 704]\n  vmovups  zmm3, zmmword ptr [r15 + rdx + 768]\n  vmovups  zmm2, zmmword ptr [r15 + rdx + 832]\n  vmovups  zmm1, zmmword ptr [r15 + rdx + 896]\n  vmovups  zmm0, zmmword ptr [r15 + rdx + 960]\n  test  r11, r11\n  jle  .LBB3_8\n  .loc  1 0 1\n  lea  rcx, [8*r8]\n  mov  r12, r9\n  mov  rbx, r11\n  sar  rcx, 3\n  add  rcx, 512\n  .p2align  4, 0x90\n.LBB3_7:\n  .loc  1 1 1\n  vmovups  zmm16, zmmword ptr [r12 + rcx - 512]\n  prefetcht0  byte ptr [r12 + rcx]\n  vfmadd231ps  zmm15, zmm16, dword ptr [r12 + r10]{1to16}\n  vfmadd231ps  zmm14, zmm16, dword ptr [r12 + r10 + 4]{1to16}\n  vfmadd231ps  zmm13, zmm16, dword ptr [r12 + r10 + 8]{1to16}\n  vfmadd231ps  zmm12, zmm16, dword ptr [r12 + r10 + 12]{1to16}\n  vfmadd231ps  zmm11, zmm16, dword ptr [r12 + r10 + 16]{1to16}\n  vfmadd231ps  zmm10, zmm16, dword ptr [r12 + r10 + 20]{1to16}\n  vfmadd231ps  zmm9, zmm16, dword ptr [r12 + r10 + 24]{1to16}\n  vfmadd231ps  zmm8, zmm16, dword ptr [r12 + r10 + 28]{1to16}\n  vfmadd231ps  zmm7, zmm16, dword ptr [r12 + r10 + 32]{1to16}\n  vfmadd231ps  zmm6, zmm16, dword ptr [r12 + r10 + 36]{1to16}\n  vfmadd231ps  zmm5, zmm16, dword ptr [r12 + r10 + 40]{1to16}\n  vfmadd231ps  zmm4, zmm16, dword ptr [r12 + r10 + 44]{1to16}\n  vfmadd231ps  zmm3, zmm16, dword ptr [r12 + r10 + 48]{1to16}\n  vfmadd231ps  zmm2, zmm16, dword ptr [r12 + r10 + 52]{1to16}\n  vfmadd231ps  zmm1, zmm16, dword ptr [r12 + r10 + 56]{1to16}\n  vfmadd231ps  zmm0, zmm16, dword ptr [r12 + r10 + 60]{1to16}\n  prefetcht0  byte ptr [r12 + r10 + 512]\n  add  r12, 64\n  dec  rbx\n  jne  .LBB3_7\n  jmp  .LBB3_8\n.LBB3_11:\n  xor  eax, eax\n  .loc  1 1 1 epilogue_begin\n  pop  rbx\n  pop  r12\n  pop  r13\n  pop  r14\n  pop  r15\n  pop  rbp\n  .cfi_def_cfa rsp, 8\n  vzeroupper\n  ret\n.Ltmp7:\n.Lfunc_end3:\n  .size  matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32, .Lfunc_end3-matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32\n  .cfi_endproc\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/","title":"Matrix Multiplication with MMT4D","text":"","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#introduction","title":"Introduction","text":"<p>Matrix multiplication (matmul) is an important operation in ML workloads that poses specific challenges to code generation. For example, matmul makes repeated accesses to the same data, which makes locality of reference a top concern.</p> <p>Moreover, modern CPUs instruction set architectures (ISAs) offer specialized SIMD instructions that the matmul implementation needs to use to achieve optimal performance, and these instructions expect data to be in a particular layout.</p> <p>This article is about an in-development MLIR operation, <code>linalg.mmt4d</code>, offering a compilation path for <code>linalg.matmul</code> that is designed from the ground up for these efficiency considerations.</p> <p>We are still in the early implementation phase of this <code>linalg.mmt4d</code> plan, but we feel confident that we know where we are going because what we are really doing here is importing into the compiler what we have learned working on optimized matrix multiplication libraries, particularly Ruy. We know what loop schedule and kernel we want the compiler to generate \u2014 essentially the same as we wrote in Ruy, give or take additional optimizations such as fusions and constant folding that become possible now that we are doing this within a compiler. This allows us to focus on how we get the compiler to generate that schedule and kernel with purely algebraic transformations that compose and enable further compiler optimizations.</p> <p>At the basis of this work is the extensible op system of the Linalg dialect in the MLIR compiler toolkit. In this case, a general purpose, mixed precision mmt4d op is defined via a high level description directly in the compiler and is then available to both users of the compiler (as a <code>linalg.mmt4d</code> op) or for direct emission via Python based IR construction (i.e. for direct integration into high level frameworks without rebuilding the compiler). The ability to define such new special forms cheaply, and without any systemic framework level cost, is part of the extensibility and composition story that we expect will become increasingly important in development and deployment scenarios in the future, and in this case, it let us spring board off of high quality code generation which was already well integrated and composed well with other features of the compiler.</p>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#existing-matrix-multplication-code-generation","title":"Existing Matrix Multplication Code Generation","text":"<p>Let us start by discussing IREE\u2019s existing matmul code generation and highlight the issues that <code>mmt4d</code> aims to overcome.</p> <p>The existing approach operates in-place on the source matrices. When we discuss \"tiling\" in this paragraph, we refer exclusively to the traversal \u2014 how these source matrices are traversed by the matmul loop. There is no \"tiled layout\" here, which will be the key difference with <code>mmt4d</code> below.</p> <p>The destination matrix is tiled into workgroups (CPU threads) tiles, then each workgroup tile is tiled to fit some level of CPU cache, and finally each tile is further tiled to fit target architecture registers (e.g. 8x8).</p> <p>That multi-level tiling means that the code works like the following loop nest:</p> <pre><code>def tiled_matmul(A, B, C, tile_m, tile_n, tile_k, tile_m_v, tile_n_v, tile_k_v):\n m = A.shape[0]\n k = A.shape[1]\n n = B.shape[1]\n for m1 in range(0, m, tile_m):\n   for n1 in range(0, n, tile_n):\n     for k1 in range(0, k, tile_k):\n       # First level of tiling views...\n       lhs_tile = A[m1:m1+tile_m, k1:k1+tile_k]\n       rhs_tile = B[k1:k1+tile_k, n1:n1+tile_n]\n       dst_tile = C[m1:m1+tile_m, n1:n1+tile_n]\n       for mv in range(0, tile_m, tile_m_v):\n         for nv in range(0, tile_n, tile_n_v):\n           for kv in range(0, tile_k, tile_k_v):\n             # Register tiling views...\n             lhs_tile_v = lhs_tile[mv:mv+tile_m_v, kv:kv+tile_k_v]\n             rhs_tile_v = rhs_tile[kv:kv+tile_k_v, nv:nv+tile_n_v]\n             # kernel.\n             dst_tile[mv:mv+tile_m_v, nv:nv+tile_n_v] += np.matmul(lhs_tile_v, rhs_tile_v)\n return C\n</code></pre> <p>The two main problems with this approach are:</p> <ul> <li> <p>Overhead to meet SIMD ISA layout requirements: In practice, the kernel     needs to use specific SIMD     instructions to perform the arithmetic. They expect small tiles of the     matrices to be loaded in registers, in a specific layout. If the matrix data     wasn't already stored in memory in such a tiled layout, then the kernel has     to perform such a data rearrangement on the fly, incurring substantial     overhead. For NxN matrix multiplication, the kernel performs     O(N<sup>3</sup>) work on O(N<sup>2</sup>) data, so doing that rearrangement     there means O(N<sup>3</sup>) overhead where O(N<sup>2</sup>) should have     sufficed, as this could have been done as a pre-processing step on     O(N<sup>2</sup>) data.</p> </li> <li> <p>Inefficent memory traversal: For efficiency reasons, we always need     <code>tile_m_v&gt;1</code> and <code>tile_n_v&gt;1</code>. That is because the higher these values, the     fewer memory-load instructions are needed overall; and this is also dictated     by the SIMD instructions that we want to use. But that means that the kernel     is accessing simultaneously multiple rows or columns of the left-hand and     right-hand side matrices. And in this existing approach, they are stored in     linear layout, not in a tiled layout, so these accesses are not contiguous     in memory. This is detrimental to memory access performance, meaning the     CPU caches, in multiple ways. One     is that these multiple non-contiguous accesses may alias each other in the     L1 cache because of low     associativity.</p> </li> </ul>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#matrix-multiplication-operation-with-4d-tiled-operands","title":"Matrix Multiplication Operation With 4D Tiled Operands","text":"<p>For the reasons above, an efficient matmul implementation must reorder data into a tiled layout matching the target SIMD ISA and making the memory access patterns as contiguous as possible.</p> <p>IREE/MLIR defaults to bufferizing all tensors into a \"row-major\" order, meaning that the last-enumerated dimension is the one that is contiguous in memory. As we prefer not to write custom bufferization code, we can't specify an alternative layout for a tensor. Fortunately, it is possible to represent a 2D tiled layout as a 4D layout. For example, <code>tensor&lt;2x2x2x2xf32&gt;</code> can represent a 4x4 matrix made of 2x2 tiles, each of which is 2x2. The row-major layout on <code>tensor&lt;2x2x2x2xf32&gt;</code> makes each 2x2 tile contiguous and row-major, and arranges the 2x2 tiles themselves into a row-major 2x2 layout in the overall 4x4 matrix.</p> <p>Such a row-major-tiled layout is exactly what we need for the left-hand side of a matrix multiplication, because matrix multiplication traverses the left-hand side matrix row by row. But for the right-hand side matrix, we want a column-major-tiled layout. To solve this problem, we decide to implement not matrix multiplication, but matrix-multiplication-by-transposed-right-hand-side which is where the <code>t</code> in the <code>linalg.mmt4d</code> came from. Now such an op is happy with both the left and right-hand sides being row-major-tiled.</p> <p>The following example illustrates that. In these diagrams, each matrix element is rendered its memory offset.</p> <p></p> <p>To compute the 2x2 block in the destination matrix, we will have to load two yellow blocks from LHS, RHS matrices respectively compute their matmul results (i.e. call the kernel), then the two blue blocks, and so on. As we can see, each tile loads data that is not contiguous. It would be better if we rearranged the elements in the following layout:</p> <p></p> <p>Now tiles are stored contiguously in memory and the kernel can simply load them from memory into the registers that will be directly consumed by the SIMD instructions performing the multiplications. Moreover, the kernel is now loading from just two contiguous data streams, a simple memory access pattern which is sure to be efficient (regarding caches, etc) on any reasonable target hardware.</p> <p>We introduce a <code>linalg.mmt4d</code> operation that performs such a matrix multiplication on matrices in a tiled layout represented as 4D tensors. That leaves the question of how to represent, within the linalg dialect, the conversions between ordinary matrices represented as 2D tensors, and these tiled matrices represented as 4D tensors. Moreover, these conversions should be tileable and decompose well. Thankfully, the transformation from 2D to 4D can be written as a reshape followed by a transpose as in the following digram:</p> <p></p> <p>So we can think of the outermost two dimensions of the 4D representations as the tile position in the overall matrix, and the innermost two as the element position within one tile. Hopefully the following Python pseudocode makes it more concrete:</p> <pre><code>def pack_2d_4d(operand, parallel_size, reduction_size):\n i1 = operand.shape[0] // parallel_size # M1\n i2 = parallel_size    # M0\n j1 = operand.shape[1] // reduction_size # K1\n j2 = reduction_size   # K0\n operand_4d = np.reshape(operand, [i1, i2, j1, j2])\n return np.transpose(operand_4d, [0, 2, 1, 3]) # [M1, K1, M0, K0]\n</code></pre> <p>Now the mmt4d operation will follow a structure as the multi level tiling, for simplicity we considered the case here where no L1 tiling is required only first level of distribution to workgroups:</p> <pre><code>def mmt4d(A, B, C, M0, N0, K0):\n M = A.shape[0]\n N = B.shape[1]\n Bt = np.transpose(B, [1, 0])\n A4d = pack_2d_4d(A, M0, K0)\n Bt4d = pack_2d_4d(Bt, N0, K0)\n M1 = A4d.shape[0]\n N1 = Bt4d.shape[0]\n K1 = A4d.shape[1]\n for m1 in range(0, M1):\n   for n1 in range(0, N1):\n     for k1 in range(0, K1):\n       # Tile views that are contiguous in memory.\n       lhs_tile = np.reshape(A4d[m1, k1, :, :], [M0, K0])\n       rhs_tile = np.reshape(Bt4d[n1, k1, :, :], [N0, K0])\n       # Inner kernel.\n       C[m1, n1, :, :] += np.matmul(lhs_tile, np.transpose(rhs_tile, [1, 0]))\n # 4d -&gt; 2D\n C2d = unpack_4d_2d(C)\n return C2d\n</code></pre> <p>The resulting 4D tiled matrix still needs be rearranged back to the original layout as 2D tensor:</p> <pre><code>def unpack_4d_2d(operand):\n i1 = operand.shape[0] # M1\n j1 = operand.shape[1] # N1\n i2 = operand.shape[2] # M0\n j2 = operand.shape[3] # N0\n operand_transposed = operand.transpose([0, 2, 1, 3]) # [M1, M0, N1, N0]\n return operand_transposed.reshape([i1 * i2, j1 * j2]) # [M, N]\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#performance-results","title":"Performance Results","text":"<p>We benchmarked various float32 matmul problems of different sizes and the result showed that mmt4d is faster than the existing matmul implementation for bigger matrices as we can see the in the following chart:</p> <p></p> <p>The SIMD instruction being used here is the simplest kind, a <code>vector*scalar</code> multiplication, and the storage orders of the matrices allow the existing implementation to directly load the vectors from the source matrices without any rearrangement overhead. So this case is particularly friendly to the existing code, which is why the mmt4d code is only faster for bigger matrices. To understand why mmt4d is faster in that case, we collected statistics of L1 cache misses:</p> <p></p> <p>This shows that in this case, the better cache-friendliness of mmt4d, thanks to its simple contiguous memory access pattern, accounts for its higher performance.</p> <p>As we proceed with increasingly sophisticated SIMD targets, starting with the dot-product instructions found in current mobile devices for the int8 case and going to become generalized to all data types all the way to float32 over the next few years with upcoming ARM SIMD instructions, the advantage of mmt4d will widen for all sizes, not just the larger ones.</p> <p>Part of why we feel confident about the eventual performance that our approach will achieve is that, as mentioned in the introduction, we are rebuilding within the compiler an existing library's schedule and kernel, and we have benchmark results about it.</p>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#conclusion","title":"Conclusion","text":"<p>We introduced a 4d tiled representation for 2d matrix-matrix multiplication with a decomposable algebric transformations that requires only reshape and transpose of input operands, we discussed and empirically showed how that solves major drawbacks in row-major linear matmul by providing a flexible way to match different ISA layout along with better cache locality achieving near peak performance.</p> <p>As was mentioned in the introduction, this work in under active development and the next immediate steps are to prove the rest of the hypothesis by:</p> <ul> <li> <p>Handling dynamic sizes and padding to the next multiple of the target tile   size.</p> </li> <li> <p>Implementing the integer case (<code>int32 += int8 * int8</code>).</p> </li> <li> <p>Implementing the dispatch to different SIMD ISA variants at runtime.</p> </li> <li> <p>Implementing cache-friendly traversal for larger matmuls and multi-threading   by interfacing with IREE's runtime dispatch.</p> </li> <li> <p>Improving the generated code by fusing the 4d tiled layout with the   producers and consumers of the <code>linalg.mmt4d</code>.</p> </li> </ul>","tags":["CPU"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/","title":"TFLite support via TOSA","text":"<p>IREE can now execute TensorFlow Lite (TFLite) models through the use of TOSA, an open standard of common tensor operations, and a part of MLIR core. TOSA\u2019s high-level representation of tensor operations provides a common front-end for ingesting models from different frameworks. In this case we ingest a TFLite FlatBuffer and compile it to TOSA IR, which IREE takes as an input format to compile to its various backends.</p> <p></p> <p>Using TFLite as a frontend for IREE provides an alternative ingestion method for already existing models that could benefit from IREE\u2019s design. This enables models already designed for on-device inference to have an alternative path for execution without requiring any additional porting, while benefiting from IREE\u2019s improvements in buffer management, work dispatch system, and compact binary format. With continued improvements to IREE/MLIR\u2019s compilation performance, more optimized versions can be compiled and distributed to target devices without an update to the clientside environment.</p> <p>Today, we have validated floating point support for a variety of models, including mobilenet (v1, v2, and v3) and mobilebert. More work is in progress to support fully quantized models, and TFLite\u2019s hybrid quantization, along with dynamic shape support.</p>","tags":["TensorFlow"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/#examples","title":"Examples","text":"<p>TFLite with IREE is available in Python and Java.  We have a colab notebook that shows how to use IREE\u2019s python bindings and TFLite compiler tools to compile a pre-trained TFLite model from a FlatBuffer and run using IREE.  We also have an Android Java app that was forked from an existing TFLite demo app, swapping out the TFLite library for our own AAR.  More information on IREE\u2019s TFLite frontend is available here.</p>","tags":["TensorFlow"]},{"location":"developers/","title":"Developers","text":"<p>These pages cover topics useful for project maintainers and contributors.</p> <p>Caution</p> <p>Some of these pages may be stale. Contributions are always welcome!</p>"},{"location":"developers/usage-best-practices/","title":"Usage best practices","text":"<p>This page contains a list of best practices for getting the most out of IREE, spanning model authoring, ahead-of-time compilation, and runtime use. Treat these as a collection of ideas to consider or areas to start benchmarking when working on your own applications.</p>"},{"location":"developers/usage-best-practices/#introduction","title":"Introduction","text":"<p>Common themes include:</p> <ul> <li>Give the compiler as much information as possible</li> <li>Give the compiler opportunities to batch work together or defer computation</li> <li>Keep compute devices saturated with work through pipelining</li> <li>Use dense math where possible, particularly for inner loop bodies</li> <li>Limit synchronization points between devices like CPUs and GPUs</li> <li>Profile early and often, using the right tools for each level of granularity</li> </ul>"},{"location":"developers/usage-best-practices/#practices-for-model-authoring","title":"Practices for model authoring","text":""},{"location":"developers/usage-best-practices/#track-state-within-your-model-when-possible","title":"Track state within your model when possible","text":"<p>If your model is stateful prefer to store that state directly within your program rather than externalizing it through arguments and return values. By keeping state inside your program the compiler is better able to reason about it and function calls will have lower overhead.</p> <p>If you do externalize state, try to pack that state into a limited number of arguments.</p> <p>See the variables and state sample for further guidance on tracking and using state.</p>"},{"location":"developers/usage-best-practices/#limit-uses-of-dynamic-shapes","title":"Limit uses of dynamic shapes","text":"<p>While IREE aims to support general dynamic shapes use, it is better able to optimize parts of programs where shapes are static. Slow varying dimensions like batch index or timestamp are safer uses of dynamic shapes than faster varying dimensions like the x/y/channel dimensions of images.</p> <p>See the dynamic shapes sample for further guidance on using dynamic shapes.</p>"},{"location":"developers/usage-best-practices/#practices-for-compilation-settings","title":"Practices for compilation settings","text":"<p>TODO: which compiler targets to use (try both CUDA and Vulkan?)</p> <p>TODO: use the most specific LLVM target triple you can?</p>"},{"location":"developers/usage-best-practices/#tuning-compilation-heuristics","title":"Tuning compilation heuristics","text":"<p>IREE runs its own suite of benchmarks continuously using the definitions at https://github.com/iree-org/iree/tree/main/benchmarks. The flags set for these benchmarks represent the latest manually tuned values for workloads we track closely and referencing them may help with your own search for peak performance. You can use these flags in your own explorations, but note that as compiler performance matures, the existing flags will gradually be replaced with attributes for autotuning or command line options for experimental features.</p>"},{"location":"developers/usage-best-practices/#practices-for-runtime-use","title":"Practices for runtime use","text":"<p>TODO: sample code, profile numbers</p>"},{"location":"developers/usage-best-practices/#tuning-runtime-settings","title":"Tuning runtime settings","text":"<p>When running on the CPU, the task system flags specified in iree/task/api.c give control over how worker threads will be created. For example, the <code>--task_topology_group_count=3</code> flag can be set to explicitly run on three workers rather than rely on heuristic selection that defaults to one worker per detected physical core.</p> <p>If running on a single thread or system with no threading support the <code>local-sync</code> HAL driver can be used instead of the multithreaded <code>local-task</code> HAL driver to reduce dependencies and code size. When running with the <code>local-sync</code> driver all execution happens inline on the thread invoking the IREE runtime and will block until it has completed.</p>"},{"location":"developers/usage-best-practices/#do-the-minimum-amount-of-work-cache-queries-and-reuse-buffers","title":"Do the minimum amount of work: cache queries and reuse buffers","text":"<p>When using IREE's runtime libraries, try to front-load queries, particularly queries using strings that look up into maps like <code>iree_runtime_session_call_by_name</code>, so that hot sections of code are doing the minimum amount of work: routing inputs through buffers, scheduling runtime calls, and routing outputs through other buffers.</p>"},{"location":"developers/vulkan-environment-setup/","title":"Vulkan environment setup","text":"<p>Vulkan is a new generation graphics and compute API that provides high-efficiency, cross-platform access to modern GPUs used in a wide variety of devices from PCs and consoles to mobile phones and embedded platforms.</p> <p>This page lists steps and tips for setting up and troubleshooting a Vulkan development environment. The information here is meant to be generic.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-architecture","title":"Vulkan architecture","text":"<p>Vulkan adopts a layered architecture, which aims to better support extensiblity. There are four components involved in this architecture:</p> <ul> <li>The Vulkan Application</li> <li>The Vulkan Loader</li> <li>Vulkan Layers</li> <li>Installable Client Drivers (ICDs)</li> </ul> <p></p> <p>The Vulkan loader sits between the Vulkan application, which calls Vulkan APIs, and the ICDs, which implements these Vulkan APIs. Vulkan layers agument the Vulkan system to provide optional features like validation and debugging. The Vulkan loader composes a chain of requested layers, which processes the Vulkan application's API calls one by one, and finally redirects the API calls made by the Vulkan application to one or more ICDs.</p> <p>It's highly recommned to read the Architecture of the Vulkan Loader Interfaces Overview to get a general understanding of what these components are and how they interact with one another.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-setup_1","title":"Vulkan environment setup","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#windows","title":"Windows","text":"<p>You need to install the Vulkan SDK from LunarG to get the Vulkan loader.</p> <p>Typically the Vulkan SDK will be installed at <code>C:\\VulkanSDK\\&lt;version&gt;\\</code> and there will be an environment variable <code>VULKAN_SDK</code> pointing to it. You can run the <code>vulkancube</code> executable under the <code>Bin\\</code> subdirectory of the Vulkan SDK to make sure everything works properly. If not, you probably need to check whether the graphics card is Vulkan capable or update the driver.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#debianubuntu","title":"Debian/Ubuntu","text":"<p>For Ubuntu 20.04/22.04, it's recommended to directly install the full Vulkan SDK from LunarG's APT sources for the loader and various developer tools.</p> <p>If you want to have a minimal environment, the following packages should be installed for a proper Vulkan runtime:</p> <ul> <li><code>libvulkan1</code> for the Vulkan loader <code>libvulkan.so</code>.</li> <li>For AMD GPUs, you can install<ul> <li><code>mesa-vulkan-drivers</code> for the Mesa AMD Vulkan ICD, or</li> <li>AMD's official VUlkan ICD.</li> </ul> </li> <li>For NVIDIA GPUs, you can install<ul> <li><code>nvidia-vulkan-icd</code> on Debian for NVIDIA Vulkan ICD.</li> <li>the most recent <code>nvidia-driver-*</code> package on Ubuntu for NVIDIA Vulkan ICD.</li> </ul> </li> </ul> <p>The above packages provide the Vulkan loader and ICDs. With them a Vulkan application should be able to run. You may additionally want to install</p> <ul> <li>vulkan-tools for command-line tools like <code>vulkaninfo</code>   (dumping available ICDs and their capabilities) and GUI application like   <code>vulkancube</code> (rendering a rotating cube).</li> </ul> <p>In order to develop Vulkan applications, you additionally need the following packages:</p> <ul> <li>libvulkan-dev for various Vulkan header files.</li> <li>vulkan-validationlayers for Vulkan validation     layers like <code>VkLayer_standard_validation</code>.</li> </ul>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#linux","title":"Linux","text":"<p>For other Linux distros, please consult the corresponding package management tools for the packages needed. (And please feel free to update this doc regarding them.)</p> <p>You can also download and install the Vulkan SDK tarball from LunarG. It packages the loader with many useful layers and other shader tools.</p> <p>You can also build the Vulkan SDK component projects like Vulkan-Loader and Vulkan-ValidationLayers from source. But note that building these components separately you need to make sure they are consistent with one another (e.g., using the same version of Vulkan headers) to function together.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#android","title":"Android","text":"<p>Please make sure your Android device is Vulkan capable. Vulkan is supported on Android since 7, but we track newer Android versions (10+) closely and haven't set a clear min version yet.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#multiple-vulkan-sdks","title":"Multiple Vulkan SDKs","text":"<p>If you have multiple versions of Vulkan loaders exist, you may also need to set <code>LD_LIBRARY_PATH</code> and <code>LD_PRELOAD</code> to load the desired version of the loader. For example:</p> <pre><code>LD_LIBRARY_PATH={PATH_TO_VULKAN_SDK}/x86_64/lib/\nLD_PRELOAD=libvulkan.so.1\n</code></pre> <p>This can also be done by sourcing the proper <code>setup-env.sh</code> from one of the downloaded Vulkan SDKs.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-troubleshooting","title":"Vulkan environment troubleshooting","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#useful-environment-variables","title":"Useful environment variables","text":"<p>There are a few environment variables that can alter the default Vulkan loader behavior and print verbose information, notably:</p> <ul> <li><code>VK_LOADER_DEBUG</code>: enable loader debug messages. Setting it to <code>all</code> will     enable the most verbose logging from the loader. This is especially useful     when trying to see what layers/ICDs are searched and used.</li> <li><code>VK_ICD_FILENAMES</code>: force the loader to use a specific ICD. This is     especially useful when you have multiple Vulkan capable devices and want to     select which one to use manually.</li> <li><code>VK_INSTANCE_LAYERS</code>: force the loader to enable the given layers. For     example, You can force enable <code>VK_LAYER_LUNARG_api_dump</code> to have a detailed     dump of all Vulkan API calls made by the application. You can force enable     <code>VK_LAYER_LUNARG_core_validation</code> to validate the API calls made by the     application.</li> <li><code>VK_LAYER_PATH</code>: override the loader's standard layer library search folders.</li> </ul> <p>Please see the Vulkan loader's documentation for detailed explanation for these variables.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#setting-environment-variables-for-bazel-test","title":"Setting environment variables for Bazel test","text":"<p>Bazel runs tests in a sandbox and environment variables must be passed through to the test runner. Consider putting environment setup in a <code>user.bazelrc</code> to save typing. For example:</p> <pre><code>test --test_env=\"LD_LIBRARY_PATH=/absolute/path/to/vulkan/sdk/x86_64/lib/\"\ntest --test_env=\"LD_PRELOAD=libvulkan.so.1\"\ntest --test_env=\"VK_LAYER_PATH=/absolute/path/to/additional/layers/:$VK_LAYER_PATH\"\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-function-vkcreateinstance-not-available-on-android","title":"Vulkan function <code>vkCreateInstance</code> not available on Android","text":"<p>Since Android 8 Oreo, Android re-architected the OS framework with project Treble. Framework libraries and vendor libraries have a more strict and clear separation. Their dependencies are carefully scrutinized and only selected cases are allowed. This is enforced with linker namespaces.</p> <p><code>/data/local/tmp</code> is the preferred directory for automating native binary tests built using NDK toolchain. They should be allowed to access libraries like <code>libvulkan.so</code> for their functionality. However, there was an issue with fully treblized Android 10 where <code>/data/local/tmp</code> did not have access to the linker namespaces needed by <code>libvulkan.so</code>. This should be fixed now. But as typically in the Android system, it takes a long time to see the fix getting propagated, if ever.</p> <p>A known workaround is to symlink the vendor Vulkan implementation under <code>/vendor/lib[64]</code> as <code>libvulkan.so</code> under <code>/data/local/tmp</code> and use <code>LD_LIBRARY_PATH=/data/local/tmp</code> when invoking IREE executables.</p> <p>For Qualcomm Adreno GPUs, the vendor Vulkan implementation is at <code>/vendor/lib[64]/hw/vulkan.*.so</code>. So for example for Snapdragon 865:</p> <pre><code>adb shell ln -s /vendor/lib64/hw/vulkan.kona.so /data/local/tmp/libvulkan.so\n</code></pre> <p>For ARM Mali GPUs, there is only one monolithic driver (<code>/vendor/lib[64]/libGLES_mali.so</code>) for OpenGL and Vulkan and the Vulkan vendor driver (<code>/vendor/lib[64]/hw/vulkan.*.so</code>) is just a symlink to it. So for example:</p> <pre><code>adb shell ln -s /vendor/lib64/libGLES_mali.so /data/local/tmp/libvulkan.so\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#ssh-on-linux-and-x-forwarding","title":"SSH on Linux and X forwarding","text":"<p>Physical devices enumerated on NVIDIA drivers can be affected by the <code>DISPLAY</code> environment variable. If you are running under an SSH session to Linux or using chrome remote desktop and have problems with physical device enumeration, you probably want to check the <code>DISPLAY</code> environment and set it to point to a display at the server side, for example:</p> <pre><code>export DISPLAY=:0\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/building/bazel/","title":"Building with Bazel","text":"<p>This page walks through building IREE from source using the Bazel build system.</p> <p>Warning</p> <p>Bazel build support is primarily for internal project infrastructure. We strongly recommend using CMake instead.</p> <p>Our Bazel configuration is also only tested on Linux. Windows and macOS may be unstable.</p>"},{"location":"developers/building/bazel/#prerequisites","title":"Prerequisites","text":"Linux macOS Windows <ol> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the     official docs.</p> </li> <li> <p>Install a compiler such as Clang (GCC is not fully supported).</p> <pre><code>sudo apt install clang\n</code></pre> <p>Set environment variables for Bazel:</p> <pre><code>export CC=clang\nexport CXX=clang++\n</code></pre> </li> <li> <p>Install Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> </ol> <ol> <li> <p>Install Homebrew:</p> <pre><code>/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)\"\n</code></pre> </li> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the official docs or     via Homebrew:</p> <pre><code>brew install bazel\n</code></pre> </li> <li> <p>Install Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> </ol> <p>Tip</p> <p>You can simplify installation by using a package manager like Scoop or Chocolatey.</p> <ol> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the official docs.</p> <p>Also install MSYS2 by following Bazel's documentation.</p> </li> <li> <p>Install Python3 (docs here)     and Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> <li> <p>Install the full Visual Studio or \"Build Tools For Visual Studio\" from the     downloads page then     set the <code>BAZEL_VS</code> environment variable:</p> <pre><code>&gt; $env:BAZEL_VS = \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\"\n</code></pre> </li> </ol>"},{"location":"developers/building/bazel/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":""},{"location":"developers/building/bazel/#clone","title":"Clone","text":"<p>Use Git to clone the IREE repository and initialize its submodules:</p> <pre><code>git clone https://github.com/iree-org/iree.git\ncd iree\ngit submodule update --init\n</code></pre> <p>Configure Bazel:</p> <pre><code># This generates a `configured.bazelrc` file by analyzing your environment.\n# Skipping this step will make it difficult to select your platform/compiler.\npython3 configure_bazel.py\n</code></pre>  Linux macOS Windows <p>(No Linux-specific tips for configuring)</p> <p>(No macOS-specific tips for configuring)</p> <p>Tip</p> <p>Clone to a short path like <code>C:\\projects\\</code> to avoid issues with Windows maximum path lengths (260 characters).</p> <p>Tip</p> <p><code>configure_bazel.py</code> only detects that you have Windows and will output the default <code>--config=windows</code> to <code>configured.bazelrc</code>, which assumes the latest version of MSVC. To avoid some warnings, you may want to replace it with (for example) <code>--config=msvc2022</code>.</p>"},{"location":"developers/building/bazel/#build","title":"Build","text":"<p>Run all tests:</p> <pre><code>bazel test -k //...\n</code></pre> <p>Run all tests except those that require CUDA:</p> <pre><code>bazel test -k //... \\\n    --iree_drivers=local-sync,local-task,vulkan \\\n    --test_tag_filters=\"-driver=cuda,-target=cuda\" \\\n    --build_tag_filters=\"-driver=cuda,-target=cuda\"\n</code></pre> <p>Run all tests except those that require a GPU (any API):</p> <pre><code>bazel test -k //... \\\n    --iree_drivers=local-sync,local-task,vulkan \\\n    --test_tag_filters=\"-driver=vulkan,-driver=metal,-driver=cuda,-target=cuda\" \\\n    --build_tag_filters=\"-driver=cuda,-target=cuda\"\n</code></pre> <p>Tip</p> <p>See the <code>build_tools/bazel/build_test_all.sh</code> script for examples of other flags and environment variables that can be used to configure what Bazel runs.</p> <p>In general, build artifacts will be under the <code>bazel-bin</code> directory at the top level.</p>"},{"location":"developers/building/bazel/#recommended-userbazelrc","title":"Recommended <code>user.bazelrc</code>","text":"<p>You can put a user.bazelrc at the root of the repository and it will be ignored by git.</p>  Linux macOS Windows <pre><code>build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n</code></pre> <pre><code>build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n</code></pre> <pre><code>build --disk_cache=c:/bazelcache\nbuild:debug --compilation_mode=dbg --copt=/O2 --per_file_copt=iree@/Od --strip=never\n</code></pre>"},{"location":"developers/building/bazel/#whats-next","title":"What's next?","text":""},{"location":"developers/building/bazel/#take-a-look-around","title":"Take a Look Around","text":"<p>Build all of IREE's 'tools' directory:</p> <pre><code>bazel build tools/...\n</code></pre> <p>Check out what was built:</p> <pre><code>ls bazel-bin/tools/\n./bazel-bin/tools/iree-compile --help\n</code></pre> <p>Translate a MLIR file and execute a function in the compiled module:</p> <pre><code># iree-run-mlir &lt;compiler flags&gt; [input.mlir] &lt;runtime flags&gt;\n$ ./bazel-bin/tools/iree-run-mlir \\\n  --iree-hal-target-backends=vmvx --print-mlir \\\n  ./samples/models/simple_abs.mlir \\\n  --input=f32=-2\n</code></pre>"},{"location":"developers/building/cmake-options/","title":"CMake options","text":""},{"location":"developers/building/cmake-options/#frequently-used-cmake-options","title":"Frequently-used CMake options","text":""},{"location":"developers/building/cmake-options/#cmake_build_type","title":"<code>CMAKE_BUILD_TYPE</code>","text":"<ul> <li>type: STRING</li> </ul> <p>Sets the build type. Possible values are <code>Release</code>, <code>Debug</code>, <code>RelWithDebInfo</code> and <code>MinSizeRel</code>. If unset, build type is set to <code>Release</code>.</p>"},{"location":"developers/building/cmake-options/#cmake_lang_compiler","title":"<code>CMAKE_&lt;LANG&gt;_COMPILER</code>","text":"<ul> <li>type: STRING</li> </ul> <p>This is the command that will be used as the <code>&lt;LANG&gt;</code> compiler, which are <code>C</code> and <code>CXX</code> in IREE. These variables are set to compile IREE with <code>clang</code> or rather <code>clang++</code>. Once set, these variables can not be changed.</p>"},{"location":"developers/building/cmake-options/#iree-specific-cmake-options","title":"IREE-specific CMake options","text":"<p>This gives a brief explanation of IREE specific CMake options and variables.</p>"},{"location":"developers/building/cmake-options/#iree_enable_runtime_tracing","title":"<code>IREE_ENABLE_RUNTIME_TRACING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables instrumented runtime tracing. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_enable_compiler_tracing","title":"<code>IREE_ENABLE_COMPILER_TRACING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables instrumented compiler tracing. This requires that <code>IREE_ENABLE_RUNTIME_TRACING</code> also be set. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_compiler","title":"<code>IREE_BUILD_COMPILER</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE compiler. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_tests","title":"<code>IREE_BUILD_TESTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE unit tests. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_docs","title":"<code>IREE_BUILD_DOCS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE documentation files. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_samples","title":"<code>IREE_BUILD_SAMPLES</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE sample projects. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_python_bindings","title":"<code>IREE_BUILD_PYTHON_BINDINGS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE python bindings. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite","title":"<code>IREE_BUILD_BINDINGS_TFLITE</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE TFLite C API compatibility shim. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite_java","title":"<code>IREE_BUILD_BINDINGS_TFLITE_JAVA</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE TFLite Java bindings with the C API compatibility shim. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_experimental_remoting","title":"<code>IREE_BUILD_EXPERIMENTAL_REMOTING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds experimental remoting component. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_hal_driver_defaults","title":"<code>IREE_HAL_DRIVER_DEFAULTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Default setting for each <code>IREE_HAL_DRIVER_*</code> option.</p>"},{"location":"developers/building/cmake-options/#iree_hal_driver_","title":"<code>IREE_HAL_DRIVER_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling the build for each runtime HAL driver.</p>"},{"location":"developers/building/cmake-options/#iree_target_backend_defaults","title":"<code>IREE_TARGET_BACKEND_DEFAULTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Default setting for each <code>IREE_TARGET_BACKEND_*</code> option.</p>"},{"location":"developers/building/cmake-options/#iree_target_backend_","title":"<code>IREE_TARGET_BACKEND_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling the build for each compiler target backend.</p>"},{"location":"developers/building/cmake-options/#iree_input_","title":"<code>IREE_INPUT_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling each set of input dialects.</p>"},{"location":"developers/building/cmake-options/#iree_output_format_c","title":"<code>IREE_OUTPUT_FORMAT_C</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables the vm-c compiler output format, using MLIR EmitC. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_dev_mode","title":"<code>IREE_DEV_MODE</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Configure settings to optimize for IREE development (as opposed to CI or release). Defaults to <code>OFF</code>. For example, this will downgrade some compiler diagnostics from errors to warnings.</p>"},{"location":"developers/building/cmake-options/#iree_enable_lld","title":"<code>IREE_ENABLE_LLD</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Use lld when linking. Defaults to <code>OFF</code>. This option is equivalent to <code>-DIREE_USE_LINKER=lld</code>. The option <code>IREE_ENABLE_LLD</code> and <code>IREE_USE_LINKER</code> can not be set at the same time.</p>"},{"location":"developers/building/cmake-options/#iree_enable_asan","title":"<code>IREE_ENABLE_ASAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable address sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_msan","title":"<code>IREE_ENABLE_MSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable memory sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_tsan","title":"<code>IREE_ENABLE_TSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable thread sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_ubsan","title":"<code>IREE_ENABLE_UBSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable undefiend behavior sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#cross-compilation","title":"Cross-compilation","text":"<p>When cross compiling (using a toolchain file like <code>android.toolchain.cmake</code>), first build and install IREE's tools for your host configuration, then use the <code>IREE_HOST_BIN_DIR</code> CMake option to point the cross compiled build at the host tools.</p>"},{"location":"developers/building/cmake-with-ccache/","title":"CMake with <code>ccache</code>","text":"<p><code>ccache</code> is a compilation cache. In principle, just prepending compiler invocations with <code>ccache</code> is all one needs to enable it, e.g.</p> <pre><code>ccache clang foo.c -c -o foo.o\n</code></pre> <p>takes care of executing <code>clang</code> with these arguments and caches the output file <code>foo.o</code>. The next invocation then skips executing <code>clang</code> altogether.</p> <p>When the cache is hit, the speedup is such that the \"compilation\" becomes essentially free. However, <code>ccache</code> only caches compilation, not linking.</p> <p>Here a few scenarios where <code>ccache</code> helps:</p> <ul> <li>Incremental rebuilds. While <code>cmake</code> always tries to avoid unnecessary work in   incremental rebuilds, it can only make simple decisions based on file   timestamps. <code>ccache</code> sees deeper: if the raw source code isn't readily   a cache hit, it will then try again after preprocessing and discarding   comments.</li> <li>One pain point with <code>cmake</code> is having to start over from a clean build   directory from time to time, which by default means paying again the full cost   of a cold build. Thankfully <code>ccache</code> keeps its cache outside of any <code>cmake</code>   build directory, so the first build in the new clean build directory may be   very fast.</li> </ul>"},{"location":"developers/building/cmake-with-ccache/#installing-and-setting-up-ccache","title":"Installing and setting up <code>ccache</code>","text":"<p><code>ccache</code> is available on most platforms. On Debian-based Linux distributions, do:</p> <pre><code>sudo apt install ccache\n</code></pre> <p>The one <code>ccache</code> setting that you probably need to configure is the maximum cache size. The default <code>5G</code> is too small for our purposes. To set the cache max size, do this once:</p> <pre><code>ccache --max-size=20G\n</code></pre> <p>Tip: At the moment (late 2020), most of the code we're building is <code>third_party/llvm-project</code> so the fundamental limiting factor to how far we can cache away rebuilds is how often that dependency gets updated. Given how frequently it currently is updated, I'm finding that <code>20G</code> is enough to make the <code>ccache</code> size not be the limiting factor.</p>"},{"location":"developers/building/cmake-with-ccache/#telling-cmake-to-use-ccache","title":"Telling CMake to use <code>ccache</code>","text":"<p>Use the CMake COMPILER_LAUNCHER functionality by setting <code>CMAKE_C_COMPILER_LAUNCHER=ccache</code> and <code>CMAKE_CXX_COMPILER_LAUNCHER=ccache</code> in your</p> <p>Notes:</p> <ul> <li>This approach only works with the <code>Ninja</code> and <code>Makefile</code> generators   (<code>cmake -G</code> flag). When using other generators, another approach is needed,   based on wrapping the compiler in a script that prepends <code>ccache</code>. See this   article.</li> </ul>"},{"location":"developers/building/cmake-with-ccache/#ensuring-that-ccache-is-used-and-monitoring-cache-hits","title":"Ensuring that <code>ccache</code> is used and monitoring cache hits","text":"<p>The <code>ccache -s</code> command dumps statistics, including a cache hit count and ratio. It's convenient to run periodically with <code>watch</code> in a separate terminal:</p> <pre><code>watch -n 0.1 ccache -s  # update the stats readout every 0.1 seconds\n</code></pre>"},{"location":"developers/building/emscripten/","title":"Building with Emscripten","text":"<p>Emscripten is a complete compiler toolchain to WebAssembly, using LLVM, with a special focus on speed, size, and the Web platform. Emscripten can be used to compile parts of IREE to WebAssembly for execution within web browsers or other Wasm runtimes.</p>","tags":["Web"]},{"location":"developers/building/emscripten/#status","title":"Status","text":"<p>IREE's runtime can be compiled through Emscripten in some limited configurations. More of the runtime will be supported over time.</p> <p>IREE's compiler can be compiled through Emscripten with local changes. More work is needed for this to be generally supported.</p>","tags":["Web"]},{"location":"developers/building/emscripten/#prerequisites","title":"Prerequisites","text":"<p>Read https://emscripten.org/docs/getting_started/downloads.html and run</p> <pre><code>./emsdk install latest\n./emsdk activate latest\nsource ./emsdk_env.sh\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#building-irees-runtime-with-emscripten","title":"Building IREE's runtime with Emscripten","text":"","tags":["Web"]},{"location":"developers/building/emscripten/#host-configuration","title":"Host configuration","text":"<p>Build and install at least the compiler tools on your host machine, or install them from a binary distribution:</p> <pre><code>$ cmake -G Ninja -B ../iree-build-host/ \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DCMAKE_INSTALL_PREFIX=../iree-build-host/install \\\n    .\n$ cmake --build ../iree-build-host/ --target install\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#target-configuration","title":"Target configuration","text":"<pre><code>$ emcmake cmake -G Ninja -B ../iree-build-emscripten/ \\\n  -DCMake_BUILD_TYPE=Release \\\n  -DIREE_HOST_BIN_DIR=$(realpath ../iree-build-host/install/bin) \\\n  -DIREE_BUILD_TESTS=OFF \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\n</code></pre> <p>Build:</p> <pre><code>cmake --build ../iree-build-emscripten/ \\\n  --target iree_samples_simple_embedding_simple_embedding_vmvx_sync\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#load-into-a-webassembly-environment","title":"Load into a WebAssembly environment","text":"<p>Copy the outputs from the build process (e.g. <code>simple_embedding_vmvx_sync.js</code> and <code>simple_embedding_vmvx_sync.wasm</code>) into your application and follow instructions at either https://webassembly.org/getting-started/developers-guide/ or https://developer.mozilla.org/en-US/docs/WebAssembly/Loading_and_running.</p>","tags":["Web"]},{"location":"developers/debugging/android-with-lldb/","title":"Android LLDB debugging","text":"<p>This doc shows how to use LLDB to debug native binaries on Android. For a more complete explanation, see the official LLDB documentation on remote debugging.</p>","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#prerequisites","title":"Prerequisites","text":"<p>We assume the following setup:</p> <ol> <li>Android NDK is installed and    the <code>ANDROID_NDK</code> environment variable is set to the installation path.</li> <li>Your Android device connected and configured for    <code>adb</code>.</li> <li>The Android binary of interest is already compiled and the command to run it    (in <code>adb shell</code>) is <code>&lt;your-binary&gt; [program args...]</code>. This does not have    to be a proper Android app with a manifest, etc.</li> </ol>","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#running-manually","title":"Running Manually","text":"<ol> <li> <p>Push the toolchain files, including <code>lldb-server</code>, to your device:</p> <pre><code>adb shell \"mkdir -p /data/local/tmp/tools\"\nadb push \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/aarch64/* /data/local/tmp/tools\n</code></pre> <p>You may need to adjust the clang toolchain version to match the one in your NDK. You can find it with <code>find \"$ANDROID_NDK/toolchains/llvm/prebuilt\" -name lldb-server</code>.</p> </li> <li> <p>Set up port forwarding. We are going to use port 5039 but you are free to    pick a different one:</p> <pre><code>adb forward tcp:5039 tcp:5039\n</code></pre> </li> <li> <p>Start an <code>lldb-server</code> in a new interactive adb shell:</p> <pre><code>adb shell\n/data/local/tmp/tools/lldb-server platform --listen '*:5039' --server\n</code></pre> </li> <li> <p>Launch <code>lldb</code>, connect to the server and run the binary:</p> <pre><code>lldb -o 'platform select remote-android' \\\n    -o 'platform connect connect://:5039' \\\n    -o 'platform shell cd /data/local/tmp'\ntarget create &lt;your-binary&gt;\nrun [program args...]\n</code></pre> <p>You can either use the system <code>lldb</code> or a prebuilt under <code>\"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/&lt;your-host-arch&gt;</code>.</p> <p>Explanation: each <code>-o</code> (short for <code>--one-shot</code>) tells lldb to execute a command on startup. You can run those manually in the lldb shell, if you prefer. Then, we tell lldb which working directory to use, where to find the executable, and what command line arguments to use.</p> </li> </ol>","tags":["Android"]},{"location":"developers/debugging/compile-time-regressions/","title":"Compile time regression debugging","text":"<p>So the IREE compiler used to compile a program quickly, but it is now slower. What do you do?</p>"},{"location":"developers/debugging/compile-time-regressions/#initial-information-gathering","title":"Initial information gathering","text":"<p>Try to answer as many of these questions as you can:</p> <p>When did compilation get slower?</p> <p>A specific git commit is ideal, but \"sometime in the last week\" is a good   starting point. You'll ultimately want to find a culprit release or git   commit that changed the compiler code.</p> <p>How much slower did compilation get?</p> <p>Be specific - did it jump from 1 minute to 2 minutes, or 1 minute to 1 hour?   Identifying the scale of the regression can help set the priority to   investigate it.</p> <p>What is the full compile command?</p> <p>Try to extract the input program and full list of flags passed to the   compiler binary so that others can reproduce what you're seeing. Try to   distill this as much as possible to using just native tools (no Python or   other framework layers).</p> <p>What environment is the compiler running in?</p> <p>Are you using a <code>Debug</code> build, or a release build? What operating system and   size machine is running the compiler (e.g. Linux developer machine, or a   smaller system)?</p>"},{"location":"developers/debugging/compile-time-regressions/#culprit-finding-and-bisecting","title":"Culprit finding and bisecting","text":"<p>If you only have a rough idea of when something changed and want to narrow that down to a specific code change, bisecting can help.</p>"},{"location":"developers/debugging/compile-time-regressions/#running-git-bisect","title":"Running <code>git bisect</code>","text":"<p>Building the compiler from source and using <code>git bisect</code> will let you pinpoint specific commits in IREE, though it typically won't let you step through changes in submodules (e.g. MLIR updates in <code>third_party/llvm-project/</code>).</p> <p>Tip: Configure ccache if you'll be rebuilding the compiler while bisecting</p> <p>A manual workflow with <code>git bisect</code> looks like this:</p> <pre><code>git bisect start --first-parent\ngit bisect good [&lt;rev&gt;]\ngit bisect bad [&lt;rev&gt;]\n\n# Read the prompts from the command as it runs\n# At each step, test the compiler:\n#   git submodule update\n#   cmake --build build/ --target iree-compile\n#   ./build/tools/iree-compile &lt;args&gt;\n#       attach Tracy, observe timing, print IR, etc. to determine if fast or slow\n#       if fast, `git bisect good`\n#       if slow, `git bisect bad`\n#   repeat\n</code></pre> <p>An automated workflow can use <code>git bisect run</code> and a script:</p> <pre><code># run_bisect.sh\ngit submodule update\ncmake --build build/ --target iree-compile\n# Other logic here\n</code></pre> <pre><code>git bisect start --first-parent\ngit bisect good [&lt;rev&gt;]\ngit bisect bad [&lt;rev&gt;]\ngit bisect run run_bisect.sh\n</code></pre>"},{"location":"developers/debugging/compile-time-regressions/#sample-compile-executable-sources-individually-with-a-timeout","title":"Sample: compile executable sources individually with a timeout","text":"<pre><code>#!/bin/bash\n\nset -xeuo pipefail\n\n# --------------------------------------------------------------------------- #\n# Settings                                                                    #\n# --------------------------------------------------------------------------- #\n\nINPUT_FILE_PATH=\"/path/to/program.mlirbc\"\nTMP_DIR=\"../iree-tmp\"\n\ndeclare -a COMPILER_FLAGS=(\n  \"--iree-input-type=stablehlo\"\n  \"--iree-hal-target-backends=cuda\"\n  \"--iree-hal-cuda-llvm-target-arch=sm_80\"\n)\n\nTIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10\n\n# --------------------------------------------------------------------------- #\n# Utility functions                                                           #\n# --------------------------------------------------------------------------- #\n\n# Call to have `git bisect` skip this commit (don't mark as good _or_ bad)\n# https://git-scm.com/docs/git-bisect#_bisect_run\nskip_on_error() {\n  &gt;&amp;2 echo \"** Skipping due to error: $1 **\"\n  exit 125  # Special exit code for `git bisect skip`\n}\n\n# --------------------------------------------------------------------------- #\n# Main script                                                                 #\n# --------------------------------------------------------------------------- #\n\n# Store git version hash, so we can dump artifacts to unique directories later.\nGIT_SHA=\"$(git rev-parse --short HEAD)\"\n\necho \"** Building iree-compile at ${GIT_SHA} **\"\n\n# The `git bisect` command only checks out a commit, so update submodules.\ngit submodule update\n\n# Build the compiler. You'll want ccache configured to make this fast!\ncmake --build ../iree-build/ --target iree-compile || skip_on_error \"CMake build failed\"\n\n# Run the compiler, dumping executable sources and stopping.\nSOURCES_DIR=\"${TMP_DIR}/sources-${GIT_SHA}\"\necho \"** Running iree-compile at ${GIT_SHA}, dumping sources to ${SOURCES_DIR} **\"\n../iree-build/tools/iree-compile \\\n    ${INPUT_FILE_PATH} \\\n    ${COMPILER_FLAGS[@]} \\\n    --iree-hal-dump-executable-sources-to=${SOURCES_DIR} \\\n    --compile-to=executable-sources \\\n    -o /dev/null\n\n# Run the compiler again on each executable individually.\necho \"** Running iree-compile at ${GIT_SHA} for each executable source **\"\nSOURCES=($(ls -1 ${SOURCES_DIR}))\nfor SOURCE in \"${SOURCES[@]}\"; do\n  echo \"  * Compiling: ${SOURCE} *\"\n  timeout --verbose ${TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE} \\\n   ../iree-build/tools/iree-compile ${SOURCES_DIR}/${SOURCE} \\\n    ${COMPILER_FLAGS[@]} \\\n    --compile-mode=hal-executable \\\n    -o /dev/null\ndone\n</code></pre>"},{"location":"developers/debugging/compile-time-regressions/#profiling-and-tracing","title":"Profiling and tracing","text":"<p>If you want to understand why the compiler is fast or slow, or if you want to compare performance in detail between two versions, consider these profiling options.</p>"},{"location":"developers/debugging/compile-time-regressions/#mlir-pass-timing","title":"MLIR pass timing","text":"<p>The <code>-mlir-timing</code> flag enables Pass Timing instrumentation. Once the compiler finishes running, this prints a report like</p> <pre><code>===-------------------------------------------------------------------------===\n                      ... Pass execution timing report ...\n===-------------------------------------------------------------------------===\n  Total Execution Time: 0.0203 seconds\n\n   ---Wall Time---  --- Name ---\n   0.0047 ( 55.9%)  Canonicalizer\n   0.0019 ( 22.2%)  VerifierPass\n   0.0016 ( 18.5%)  LLVMLoweringPass\n   0.0003 (  3.4%)  CSE\n   0.0002 (  1.9%)  (A) DominanceInfo\n   0.0084 (100.0%)  Total\n</code></pre> <p>This is easy data to collect, especially remotely over SSH, but it might not paint a complete picture and requires waiting for compilation to finish.</p>"},{"location":"developers/debugging/compile-time-regressions/#using-tracy","title":"Using Tracy","text":"<p>See our documentation on profiling with Tracy, in particular the section on tracing <code>iree-compile</code>. For compile time regressions, pay particular attention to the compilation phases (Flow/Stream/HAL), how many times <code>TranslateExecutablesPass</code> runs, and if there are outlier passes that take significantly longer to run than others.</p> <p>Here are some previous analyses for inspiration:</p> <ul> <li>https://github.com/iree-org/iree/issues/12033</li> <li>https://github.com/iree-org/iree/issues/12035</li> <li>https://github.com/iree-org/iree/issues/12183</li> <li>https://github.com/iree-org/iree/issues/13189</li> </ul> <p>Example slow trace:</p> <p></p> <p>Example fast trace:</p> <p></p> <p>Example sampling statistics showing 10s of minutes in LLVM codegen:</p> <p></p>"},{"location":"developers/debugging/compile-time-regressions/#using-perf-and-pprof","title":"Using <code>perf</code> and <code>pprof</code>","text":"<p>These linux tools allow for fine-grained profiling. Below we present a list of steps to profile <code>iree-compile</code> and visualize the results as a flame graph.</p> <ol> <li> <p>Compile IREE tools with debug information (line tables at minimum) and frame    pointers. You can do that by selecting the <code>RelWithDebInfo</code> build type and    adding <code>-fno-omit-frame-pointers</code> to your compiler flags:</p> <pre><code>cmake &lt;Your-CMAKE-Flags&gt; \\\n   -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n   -DCMAKE_CXX_FLAGS=\"-fno-omit-frame-pointer\" \\\n   -DCMAKE_C_FLAGS=\"-fno-omit-frame-pointer\"\n</code></pre> </li> <li> <p>Set perf event scope/access to the appropriate level with    <code>perf_event_paranoid</code>.</p> <pre><code>echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid\n</code></pre> </li> <li> <p>Run <code>iree-compile</code> under the <code>perf</code> profiler and collect profile data. This    requires <code>sudo</code>.</p> <pre><code>sudo perf record -F 999 -g -- tools/iree-compile &lt;Your-Compile-Arguments&gt;\nsudo chown \"$USER:$USER\" perf.data\n</code></pre> </li> <li> <p>Use <code>pprof</code> to process <code>perf.data</code> from the previous step and start a local    http server with the visualized profile. See the    <code>pprof</code>'s README for installation    instructions and make sure to build    <code>perf_data_converter</code> and    add it to your <code>PATH</code>.</p> <pre><code>pprof -http ':' perf.data\n</code></pre> </li> </ol>"},{"location":"developers/debugging/compile-time-regressions/#stepping-through-compiler-ir","title":"Stepping through compiler IR","text":"<p>Debugging an MLIR-based compiler like IREE usually involves reading IR at some point. For compile time regressions, it helps to snapshot the IR at a few key phases and look for differences between fast compilation and slow compilation.</p> <p>Here is one useful flag combination:</p> <pre><code>--mlir-disable-threading \\\n--mlir-elide-elementsattrs-if-larger=8 \\\n--mlir-print-ir-after=iree-hal-materialize-interfaces\n</code></pre>"},{"location":"developers/debugging/gpu/","title":"GPU debugging playbook","text":"<p>This page aims to provide general approaches and practical tips for debugging GPU compiler/runtime correctness/performance issues in IREE.</p> <p>GPUs fundamentally have similar architectures and software stacks. We target GPUs from various vendors using different GPU APIs, but they share quite a lot common infrastructure in IREE. So the approaches and tips here should be widely applicable.</p> <p>For the ones that are specific to a particular kind of problem/component/GPU, they are prefixed with proper icons to be clear. Here are what those icons represents--</p> Icon Category Correctness Performance AMD HIP/ROCm Apple Metal Microsoft DirectX NVIDIA CUDA Vulkan","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#general-methodology","title":"General methodology","text":"<p>The difficulties associated with debugging typically arise from isolating the problematic component and pinpointing the culprit. Once done, the solution typically derives naturally.</p> <p>There are many components in the IREE stack; hierarchically we can categorize them into either the compiler or runtime bucket:</p> <ul> <li>For compilers, there are multiple layers from the top to the bottom--frontend   input importers, IREE flow/stream compilation, IREE host/device compilation,   GPU backend in LLVM proper or GPU driver compiler for SPIR-V.</li> <li>For runtime, we have fewer layers--IREE HAL drivers, and GPU driver.</li> </ul> <p>Any of the above components/layers can have bugs. It's important to reduce the potential surface area to make the problem more tractable.</p> <p>Once we have a more isolated case, the general methodology to pinpoint the exact culprit is to</p> <ol> <li>collect and inspect the symptoms,</li> <li>form hypothesis and run experiments to prove/refute the hypothesis, and</li> <li>iterate.</li> </ol>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#with-shortcuts","title":".. with shortcuts","text":"<p>The above procedure is for facing a large problem with no clue, for example, when bringing up a new model end-to-end via IREE.</p> <p>Though most of the time, we can leverage existing facilities to avoid going down the full top-down hiearchical debugging procedure. For example, for regression happening on an existing model, CI or <code>git bitsect</code> might tell us directly the culprit commit.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#using-tools","title":".. using tools","text":"<p>For issues with strong signals like crashing, it's also easier to pinpoint the exact culprit with dedicated tools--we can leverage various sanitizers or debuggers.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#isolating-the-problematic-component","title":"Isolating the problematic component","text":"<p>If we are facing a large problem without a clear clue, we need to isolate the problematic compiler or runtime layer first, typically by comparing with a working solution:</p> <p>[correctness/performance]</p> <p>Sanitize the environment first. Asking these questions and making sure the environment is proper can save you hours of debugging sometimes:</p> <ul> <li>Did you recently updated the GPU SDK or driver?</li> <li>Are others able to reproduce the issue?</li> <li>If not what SDK / driver versions they are using?</li> <li>Is your machine drawing enough power when benchmarking?</li> <li>Is your machine connected with a mointor (e.g., for Vulkan)?</li> <li>How long since you last rebooted your machine? \ud83d\udc7b</li> </ul> <p>[correctness/performance]</p> <p>We have multiple GPU targets/drivers in IREE--LLVMGPU/CUDA, LLVMGPU/HIP, SPIR-V/Vulkan, SPIR-V/Metal.</p> <p>For the same GPU, we typically have two paths to target, e.g., CUDA/HIP or Vulkan for NVIDIA/AMD GPUs, Metal or Vulkan for Apple GPUs.</p> <p>If one path is correct/performant, we can diff against it to try isolate the problem--the common/shared compiler/runtime code is likely okay; what differs between paths is likely problematic.</p> <p>[correctness/performance] [vulkan]</p> <p>Vulkan supports different GPUs. Similarly, if one GPU gives correct/performant result, we diff against it to find clues.</p> <p>Even more code in compiler/runtime are shared here; what's problematic is likely different capabilities triggering different CodeGen pipelines so revealing bugs in a particular CodeGen pipeline. Or there are driver issues from a particular vendor.</p> <p>[correctness]</p> <p>If the CPU is working properly, we can use the same dispatch region formation and diff against the CPU dispatches one by one to isolate the problem. See this issue as an example.</p> <p>[correctness]</p> <p><code>--iree-flow-trace-dispatch-tensors</code> and/or <code>--iree-flow-break-dispatch=</code> to <code>iree-compile</code> is quite helpful to inspect the output after all/each dispatch(es).</p> <p>[correctness]</p> <p><code>iree-reduce</code> is a great tool to reduce and isolate issues programmatically. See more details here.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#pinpointing-compiler-issues","title":"Pinpointing compiler issues","text":"<p>Once we identified that the problem is due to some compiler issue, we can investigate by comparing with different paths and inputs:</p> <p>[correctness]</p> <p>For the same dispatch, we may have different CodeGen pipelines, e.g., for matmul we can have simple SIMT pipeline or using tensor/matrix cores. We can try to switch between different pipelines to isolate the problem.</p> <p>[correctness]</p> <p>Assuming we have a small repro, we can also try to see if there are \"patterns\" in the wrong result (e.g., this issue). Or mutate the input to see if the failure has some \"consistency\".</p> <p>[correctness/performance]</p> <p><code>--mlir-print-ir-*</code> and <code>--debug*</code> to <code>iree-opt</code> is our best friend. Sometimes it just takes eyeballing the IRs between stages to find clues.</p> <p>[performance]</p> <p>For identifying performance issues, we typically need to use:</p> <ul> <li>Tracy profiling to get a   course-grained command-buffer timing to understand what's the most   time-consuming kernels.   Typical big performance issues include but not limit to going down a   incorrect CodeGen pipeline, missing tiling/vectorization, having an   improper tiling/vectorization configuration, and so on.   If the course-grained information is not enough, then we need to</li> <li>vendor-specific tools to   understand kernel internal counters to identify the bottleneck.</li> </ul>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#pinpointing-runtime-issues","title":"Pinpointing runtime issues","text":"<p>On the other side, if we suspect that it's a runtime issue, here are some useful approachs and tips:</p> <p>[correctness/performance]</p> <p>Tracy profiling is a great way to view how the application runs dynamically. It can help to show problematic GPU API call sequences and performance bottlenecks.</p> <ul> <li>It requires adding <code>-DIREE_ENABLE_RUNTIME_TRACING=ON</code> during CMake   configuration, or use the <code>IREE_PY_RUNTIME=tracy</code> environment variable   when invoking IREE runtime installed via Python packages.</li> </ul> <p>[correctness]</p> <p>GPU validation can sometimes give us hints:</p> <ul> <li>[] Enable validation via <code>export METAL_DEVICE_WRAPPER_TYPE=1</code>.</li> <li>[] Use <code>--vulkan_validation_layers=true</code> to <code>iree-run-module</code>, or</li> <li>[] Force enable via environment variables to the Vulkan loader:   <code>export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation</code>   (may additionally need   <code>export VK_LAYER_PATH=$VULKAN_SDK/etc/vulkan/explicit_layer.d</code> and   <code>export LD_LIBRARY_PATH=$VULKAN_SDK/lib</code> if Vulkan SDK is not installed   to a system path).</li> </ul> <p>[correctness]</p> <p>Turning on verbose output can give us more information:</p> <ul> <li>When compiling IREE runtime, add   <code>-DCMAKE_C_FLAGS=-DIREE_VM_EXECUTION_TRACING_FORCE_ENABLE=1</code> in CMake   configuration to enable VM op tracing.</li> <li>[] Use <code>--vulkan_debug_verbosity=4</code> to <code>iree-run-module</code>.</li> <li>[] Print all Vulkan APIs calls with detailed arguments:   <code>export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_api_dump</code>   (may additionally need   <code>export VK_LAYER_PATH=$VULKAN_SDK/etc/vulkan/explicit_layer.d</code> and   <code>export LD_LIBRARY_PATH=$VULKAN_SDK/lib</code> if Vulkan SDK is not installed   to a system path).</li> </ul> <p>[correctness]</p> <p>Try different \"debugging modes\" provided by HAL drivers:</p> <ul> <li>[] Switch <code>--cuda_use_streams=</code> between <code>true</code> and <code>false</code> to   <code>iree-run-module</code> to see whether the issue comes from the stream/graph   command buffer implementation.</li> <li>[] Switch <code>--cuda_async_allocations=false</code> to <code>iree-run-module</code> to   see if the issue comes from async allocation.</li> <li>[] Use <code>--metal_serial_command_dispatch=true</code>,   <code>--metal_command_buffer_retain_resources=true</code>, or   <code>--metal_resource_hazard_tracking=true</code> to <code>iree-run-module</code> to see   if any of the above \"fixes\" the issue.   It can help to isolate the pontential problem.</li> <li>[] Use <code>--vulkan_robust_buffer_access=true</code> to <code>iree-run-module</code>   especially when seeing undeterministic/corrupted contents in buffers and   suspecting there are buffer allocation/indexing issues.</li> </ul>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/integration-tests/","title":"Integration test debugging","text":"<p>This document includes tips for triaging integration test correctness issues. Feel free to reach out to @hanhanW or ask questions on Discord for more help.</p>"},{"location":"developers/debugging/integration-tests/#general-tips","title":"General tips","text":""},{"location":"developers/debugging/integration-tests/#narrow-down-reproducers","title":"Narrow down reproducers","text":"<ul> <li>Models themselves can be large, and IREE breaks models into dispatches/kernels and then launches those individually. Program outputs could diverge starting from any individual launch. To get a smaller reproducer, you can use --iree-flow-trace-dispatch-tensors.</li> <li>You can compare the logs between builds/backends to get an idea about which dispatch results in wrong outputs. The dumped inputs can be reused in a flagfile.</li> </ul> <p>Once a suspicious dispatch is identified, we can create a test case based on the dispatch function. The dispatch function can be derived after the <code>OutlineDispatchRegions</code> pass. The function signatures have to be modified manually. You'll have to put <code>flow.dispatch.tensor.load</code> variables to function arguments, and replace <code>flow.dispatch.tensor.store</code> with <code>return</code> op.</p> <p>Note: This only works when dispatch formation logics are identical between runs.</p>"},{"location":"developers/debugging/integration-tests/#iree-experimental-repository-tests","title":"iree-experimental repository tests","text":"<p>Follow README to run the model. The MLIR files will be generated. You'll find the saved file from log. E.g.,</p> <pre><code>[ RUN      ] MobilenetV2Int8Test.test_compile_tflite\nI0401 17:27:04.084272 140182373025024 test_util.py:119] Setting up for IREE\nI0401 17:27:04.085064 140182373025024 binaries.py:218] Invoke IREE Pipeline:\n  /tmp/iree-experimental/iree-experimental.venv/lib/python3.9/site-packages/iree/tools/tflite/iree-import-tflite\n    /tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/model.tflite\n    --mlir-print-debuginfo\n    --save-temp-tfl-input=/tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/tflite.mlir\n    --save-temp-iree-input=/tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/tosa.mlir\n</code></pre> <p>Unfortunately, the artifacts are not dumped in the runs. There is an issue for tracking this. A workaround can be found in the issue.</p>"},{"location":"developers/debugging/integration-tests/#tensorflow-integration-tests","title":"TensorFlow integration tests","text":"<p>These are steps to reproduce/address failures in TF/TFLite integration tests. These instructions are most stable on Linux, though they may work with a few tweaks on Windows and macOS.</p> <p>All steps here assume starting from the IREE root directory.</p> <ol> <li> <p>First create a Python virtual environment to install packages into:</p> <pre><code>python -m venv iree-tf.venv\nsource iree-tf.venv/bin/activate\n\n# Install test requirements\npython -m pip install -r ./integrations/tensorflow/test/requirements.txt\n</code></pre> </li> <li> <p>Install IREE's tools and Python bindings or build them from source</p> <p>Install distributed packages</p> <pre><code># Install packages from nightly releases\n# This should work for most cases, as the importers change infrequently\npython -m pip install \\\n  iree-compiler iree-runtime iree-tools-tf iree-tools-tflite \\\n  --find-links https://iree.dev/pip-release-links.html\n</code></pre> <p>OR build from source</p> <pre><code># Build Python bindings from source\ncmake -G Ninja -B ../iree-build/ -DIREE_BUILD_PYTHON_BINDINGS=ON .\ncmake --build ../iree-build/\n\n# Add IREE built-from-source Python packages to PYTHONPATH\nsource .env\n\n# Install IREE TF/TFLite Python packages\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n</code></pre> </li> <li> <p>Run the python test command line</p> <p>The command can be obtained from the run file. For example, if <code>iree_tfl_tests/llvmcpu_posenet_i8.run</code> failed,</p> <pre><code>cd integrations/tensorflow/test/\ncat iree_tfl_tests/llvmcpu_posenet_i8.run\n\n# REQUIRES: llvmcpu\n# RUN: %PYTHON -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=%t\n\ncd python/\npython -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=/tmp/posenet_i8_failure\n</code></pre> <p>Note that the command can only be run under <code>integrations/tensorflow/test/python</code> directory.</p> </li> <li> <p>Extract intermediate files and use with native tools</p> <p>The test will create an <code>iree_input.mlir</code> in the temp directory specified. Those can then be fed into <code>iree-compile</code> (built locally to reproduce the error)</p> <pre><code>iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-input-type=stablehlo \\\n  iree_input.mlir\n</code></pre> </li> </ol>"},{"location":"developers/debugging/releases/","title":"Release debugging playbook","text":""},{"location":"developers/debugging/releases/#tools-and-locations","title":"Tools and Locations","text":"<ul> <li><code>.github/workflows/build_package.yml</code>: Release packaging jobs</li> <li><code>build_tools/github_actions/build_dist.py</code>: Main script to build various   release packages (for all platforms). We usually use this when reproing to   approximate exactly what the CI does. Assumes a subdirectory of <code>c</code>   and writes builds to <code>iree-build</code> and <code>iree-install</code> as a peer of it. To use   locally, just symlink your source dir as <code>c</code> in an empty   directory (versus checking out).</li> </ul>"},{"location":"developers/debugging/releases/#mapping-releases-back-to-git-commits","title":"Mapping releases back to git commits","text":"<p>The source IREE commit SHA is embeded into pip releases in a few places. Starting in a python venv, you can find the IREE commit from both the shell:</p> <pre><code>\"$(find . -name 'iree-compile' -executable)\" --version\nIREE (https://iree.dev):\n  IREE compiler version 20231016.553 @ f1cb2692a086738d7f16274b9b3af6d2c15ef133\n  LLVM version 18.0.0git\n  Optimized build\n</code></pre> <p>and the Python API:</p> <pre><code>python -c \"import iree.compiler.version as v; print(v.REVISIONS['IREE'])\"\nf1cb2692a086738d7f16274b9b3af6d2c15ef133\n</code></pre>"},{"location":"developers/debugging/releases/#manylinux-releases","title":"Manylinux releases","text":"<p>The Linux releases are done in a manylinux2014 docker container. At the time of this writing, it has gcc 9.3.1 and Python versions 3.5 - 3.9 under <code>/opt/python</code>. Note that this docker image approximates a 2014 era RHEL distro, patched with backported (newer) dev packages. It builds with gcc and BFD linker unless if you arrange otherwise. <code>yum</code> can be used to get some packages.</p> <p>Get a docker shell (see exact docker image in build_package.yml workflow):</p> <pre><code>docker run --rm -it -v $(pwd):/work/c stellaraccident/manylinux2014_x86_64-bazel-4.2.2:latest /bin/bash\n</code></pre> <p>Remember that docker runs as root unless if you take steps otherwise. Don't touch write files in the <code>/work/c</code> directory to avoid scattering root owned files on your workstation.</p> <p>The default system Python is 2.x, so you must select one of the more modern ones:</p> <pre><code>export PATH=/opt/python/cp39-cp39/bin:$PATH\n</code></pre> <p>Build core installation:</p> <pre><code># (from within docker)\ncd /work\npython ./c/build_tools/github_actions/build_dist.py main-dist\n\n# Also supports:\n#   main-dist\n#   py-runtime-pkg\n#   py-xla-compiler-tools-pkg\n#   py-tflite-compiler-tools-pkg\n#   py-tf-compiler-tools-pkg\n</code></pre> <p>You can <code>git bisect</code> on the host and keep running the above in the docker container. Note that every time you run <code>build_dist.py</code>, it deletes the cmake cache but otherwise leaves the build directory (so it pays the configure cost but is otherwise incremental). You can just <code>cd iree-build</code> and run <code>ninja</code> for faster iteration (after the first build or if changing cmake flags). Example:</p> <p>Extended debugging in the manylinux container:</p> <pre><code>cd /work/iree-build\n# If doing extended debugging in the container, these may make you happier.\nyum install ccache devtoolset-9-libasan-devel gdb\n\n# Get an LLVM symbolizer.\nyum install llvm9.0\nln -s /usr/bin/llvm-symbolizer-9.0 /usr/bin/llvm-symbolizer\n\n# You can manipulate cmake flags. These may get you a better debug experience.\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASAN=ON -DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=gold -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .\n\nninja\n\n# Or you may need this if buggy LLVM tools (like mlir-tblgen) are leaking :(\nASAN_OPTIONS=\"detect_leaks=0\" ninja\n</code></pre> <p>Other tips:</p> <ul> <li>If debugging the runtime, you may have a better time just building the   Release mode <code>main-dist</code> package above once, which will drop binaries in the   <code>iree-install</code> directory. Then build the <code>py-runtime-pkg</code> or equiv and   iterate further in the build directory. Ditto for TF/XLA/etc.</li> </ul>"},{"location":"developers/debugging/releases/#testing-releases-on-your-fork","title":"Testing releases on your fork","text":"<p>To avoid interrupting the regular releases published on the IREE github, you can test any changes to the release process on your own fork.  Some setup is required before these github actions will work on your fork and development branch.</p> <p>You can run <code>schedule_candidate_release.yml</code> with a workflow dispatch from the actions tab. If you want to test using a commit other than the latest green on your <code>main</code> branch, modify the section that identifies the latest green commit to search from another commit or just hardcode one.</p> <p>To speed up <code>build_package.yml</code>, you may want to comment out some of the builds here. The <code>py-pure-pkgs</code> build takes only ~2 minutes and the <code>py-runtime-pkg</code> build takes ~5, while the others can take several hours.</p> <p>From your development branch, you can manually run the Schedule Snapshot Release action, which invokes the Build Release Packages action, which finally invokes the Validate and Publish Release action.  If you already have a draft release and know the release id, package version, and run ID from a previous Build Release Packages run, you can also manually run just the Validate and Publish Release action.</p>"},{"location":"developers/debugging/sanitizers/","title":"Sanitizers (ASan/MSan/TSan)","text":"<p>AddressSanitizer, MemorySanitizer and ThreadSanitizer are tools provided by <code>clang</code> to detect certain classes of errors in C/C++ programs. They consist of compiler instrumentation (so your program's executable code is modified) and runtime libraries (so e.g. the <code>malloc</code> function may get replaced).</p> <p>They are abbreviated as \"ASan\", \"MSan\" and \"TSan\" respectively.</p> <p>They all incur large overhead, so only enable them while debugging.</p> Tool Detects Helps debug what? Slowdown Memory overhead Android support ASan Out-of-bounds accesses, use-after-free, use-after-return, memory leaks Crashes, non-deterministic results, memory leaks 2x 3x Yes MSan Uninitialized memory reads Non-deterministic results 3x ? Yes TSan Data races Many bugs in multi-thread code 5x-15x 5x-10x No <p>Note</p> <p>See this documentation on leak detection. It is only enabled by default on some platforms.</p>"},{"location":"developers/debugging/sanitizers/#support-status-and-how-to-enable-each-sanitizer","title":"Support status and how to enable each sanitizer","text":""},{"location":"developers/debugging/sanitizers/#asan-addresssanitizer","title":"ASan (AddressSanitizer)","text":"<p>To enable ASan:</p> <pre><code>cmake -DIREE_ENABLE_ASAN=ON ...\n</code></pre> <p>Several <code>_asan</code> tests like <code>iree/tests/e2e/stablehlo_ops/check_llvm-cpu_local-task_asan_abs.mlir</code> are also defined when using this configuration. These tests include AddressSanitizer in compiled CPU code as well by using these <code>iree-compile</code> flags:</p> <pre><code>--iree-llvmcpu-link-embedded=false\n--iree-llvmcpu-sanitize=address\n</code></pre>"},{"location":"developers/debugging/sanitizers/#linking-to-the-dynamic-asan-runtime","title":"Linking to the dynamic ASan runtime","text":"<p>You may want to use ASan when using the python bindings. One way to achieve this is to build Python (or whatever executable that is going to use IREE as a shared library) with Asan. Another option is to link to the ASan runtime dynamically instead of linking it statically into an executable.</p> <p>Using clang-12 (other versions should also work) as a example, configure IREE with something like:</p> <pre><code>cmake \\\n  -DIREE_ENABLE_ASAN=ON \\\n  -DCMAKE_EXE_LINKER_FLAGS=-shared-libasan \\\n  -DCMAKE_SHARED_LINKER_FLAGS=-shared-libasan \\\n  -DCMAKE_C_COMPILER=clang-12 \\\n  -DCMAKE_CXX_COMPILER=clang++-12 \\\n  ...\n</code></pre> <p>Then when running things the ASan runtime will have to be preloaded.</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\n  python ...\n</code></pre> <p>On Ubuntu the corresponding ASan runtime is provided by a package like <code>libclang-common-12-dev</code> depending on your Clang version. E.g.</p> <pre><code>sudo apt install libclang-common-12-dev llvm-12 clang-12\n</code></pre> <p>Note that during building would also need to preload the ASan runtime, since the build executes its own binaries that are linked against the runtime.</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_OPTIONS=detect_leaks=0 \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\n  cmake --build ...\n</code></pre> <p>Tip</p> <p>If you want to run the IREE CUDA runtime driver it is likely you would need.</p> <pre><code>ASAN_OPTIONS=\"protect_shadow_gap=0\"\n</code></pre> <p>Like this</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\nASAN_OPTIONS=\"protect_shadow_gap=0\" \\\n  python ...\n</code></pre>"},{"location":"developers/debugging/sanitizers/#tsan-threadsanitizer","title":"TSan (ThreadSanitizer)","text":""},{"location":"developers/debugging/sanitizers/#c-standard-library-with-tsan-support","title":"C++ Standard Library with TSan support","text":"<p>For best results to avoid false positives/negatives TSan needs all userspace code to be compiled with Tsan. This includes <code>libstdc++</code> or <code>libc++</code>. libstdc++ is usually the default C++ runtime on Linux.</p> <p>Building GCC's 12 libstdc++ on Ubuntu 22.04 with Clang has build errors. It seems that GCC and Clang shared their TSan implementation. They may be interoperable, but to avoid problems we should build everything with GCC. This means using GCC both as a compiler and linker.</p>"},{"location":"developers/debugging/sanitizers/#build-libstdc-with-tsan-support","title":"Build libstdc++ with TSan support","text":"<p>Get GCC 12.3 source code.</p> <pre><code>git clone --depth 1 --branch releases/gcc-12.3.0 \\\n  https://github.com/gcc-mirror/gcc.git\n</code></pre> <pre><code>SRC_DIR=$PWD/gcc\nBIN_DIR=$PWD/gcc/build\n</code></pre> <p>Building all dependencies of libstdc++ with TSan has errors during linking of <code>libgcc</code>. libgcc is a dependency of libstdc++. It is desirable to build everything with TSan, but it seems this excludes libgcc, as the TSan runtime <code>libtsan</code> has it as a dependency. We build it without TSan. We do that to make libstdc++'s configuration find <code>gthr-default.h</code>, which is generated during building of libgcc. If not found C++ threads will silently have missing symbols.</p> <pre><code>LIBGCC_BIN_DIR=$BIN_DIR/libgcc\nmkdir -p $LIBGCC_BIN_DIR\ncd $LIBGCC_BIN_DIR\n\n$SRC_DIR/configure \\\n  CC=gcc-12 \\\n  CXX=g++-12 \\\n  --disable-multilib \\\n  --disable-bootstrap \\\n  --enable-languages=c,c++\n\nmake -j$(nproc) --keep-going all-target-libgcc\n</code></pre> <p>Now build libstdc++.</p> <pre><code>LIBSTDCXX_BIN_DIR=$BIN_DIR/libstdc++\nmkdir -p $LIBSTDCXX_BIN_DIR\nLIBSTDCXX_INSTALL_DIR=$BIN_DIR/install/libstdc++\nmkdir -p $LIBSTDCXX_INSTALL_DIR\n\nGTHREAD_INCLUDE_DIR=$LIBGCC_BIN_DIR/x86_64-pc-linux-gnu/libgcc\nCXX_AND_C_FLAGS=\"-I$GTHREAD_INCLUDE_DIR -g -fno-omit-frame-pointer -fsanitize=thread\"\n\ncd $LIBSTDCXX_BIN_DIR\n$SRC_DIR/libstdc++-v3/configure \\\n  CC=gcc-12 \\\n  CXX=g++-12 \\\n  CFLAGS=\"$CXX_AND_C_FLAGS\" \\\n  CXXFLAGS=\"$CXX_AND_C_FLAGS\" \\\n  LDFLAGS=\"-fsanitize=thread\" \\\n  --prefix=$LIBSTDCXX_INSTALL_DIR \\\n  --disable-multilib \\\n  --disable-libstdcxx-pch \\\n  --enable-libstdcxx-threads=yes \\\n  --with-default-libstdcxx-abi=new\n\nmake -j$(nproc)\nmake install\n</code></pre> <p>When running programs you would need to use the sanitized version of libstdc++.</p> <pre><code>LD_LIBRARY_PATH=\"$LIBSTDCXX_INSTALL_DIR/lib\" \\\n  my-program ...\n</code></pre>"},{"location":"developers/debugging/sanitizers/#iree-with-tsan-support","title":"IREE with TSan support","text":"<p>To enable TSan:</p> <pre><code>cmake -DIREE_ENABLE_TSAN=ON ...\n</code></pre> <p>Several <code>_tsan</code> tests like <code>iree/tests/e2e/stablehlo_ops/check_llvm-cpu_local-task_tsan_abs.mlir</code> are also defined when using this configuration. These tests include ThreadSanitizer in compiled CPU code as well by using these <code>iree-compile</code> flags:</p> <pre><code>--iree-llvmcpu-link-embedded=false\n--iree-llvmcpu-sanitize=address\n</code></pre> <p>Note that a IREE runtime built with TSan cannot load a IREE compiled LLVM/CPU module unless those flags are used, so other tests are excluded using the <code>notsan</code> label.</p>"},{"location":"developers/debugging/sanitizers/#msan-memorysanitizer","title":"MSan (MemorySanitizer)","text":"<p>In theory that should be a simple matter of</p> <pre><code>-DIREE_ENABLE_MSAN=ON\n</code></pre> <p>However, that requires making and using a custom build of libc++ with MSan as explained in this documentation.</p> <p>As of April 2022, all of IREE's tests succeeded with MSan on Linux/x86-64, provided that the <code>vulkan</code> driver was disabled (due to lack of MSan instrumentation in the NVIDIA Vulkan driver).</p>"},{"location":"developers/debugging/sanitizers/#ubsan-undefinedbehaviorsanitizer","title":"UBSan (UndefinedBehaviorSanitizer)","text":"<p>Enabling UBSan in the IREE build is a simple matter of setting the <code>IREE_ENABLE_UBSAN</code> CMake option:</p> <pre><code>cmake -DIREE_ENABLE_UBSAN=ON ...\n</code></pre> <p>Note that both ASan and UBSan can be enabled in the same build.</p>"},{"location":"developers/debugging/sanitizers/#symbolizing-the-reports","title":"Symbolizing the reports","text":""},{"location":"developers/debugging/sanitizers/#desktop-platforms","title":"Desktop platforms","text":"<p>On desktop platforms, getting nicely symbolized reports is covered in this documentation. The gist of it is make sure that <code>llvm-symbolizer</code> is in your <code>PATH</code>, or make the <code>ASAN_SYMBOLIZER_PATH</code> environment variable point to it.</p>"},{"location":"developers/debugging/sanitizers/#android","title":"Android","text":"<p>On Android it's more complicated due to this Android NDK issue. Fortunately, we have a script to perform the symbolization. Copy the raw output from the sanitizer and feed it into the <code>stdin</code> of the <code>build_tools/scripts/android_symbolize.sh</code> script, with the <code>ANDROID_NDK</code> environment variable pointing to the NDK root directory, like this:</p> <pre><code>ANDROID_NDK=~/android-ndk-r21d ./build_tools/scripts/android_symbolize.sh &lt; /tmp/asan.txt\n</code></pre> <p>Where <code>/tmp/asan.txt</code> is where you've pasted the raw sanitizer report.</p> <p>Tip</p> <p>This script will happily just echo any line that isn't a stack frame. That means you can feed it the whole <code>ASan</code> report at once, and it will output a symbolized version of it. DO NOT run it on a single stack at a time! That is unlike the symbolizer tool that's being added in NDK r22, and one of the reasons why we prefer to keep our own script. For more details see this comment.</p>"},{"location":"developers/design-docs/cuda-hal-driver/","title":"CUDA HAL driver","text":"<p>This document lists technical details regarding the CUDA implemenation of IREE's Hardware Abstraction Layer, called a CUDA HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using CUDA is mostly straightforward; though there are places we need emulation given no direct mapping concepts or mechanisms.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#overall-design-choices","title":"Overall design choices","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cuda-driver-vs-runtime-api","title":"CUDA driver vs runtime API","text":"<p>IREE HAL's design draws inspiration from modern GPU APIs--it provides explicit control of low-level GPU objects. The compiler is expected to plan the object lifetime and schedule workload and synchronization in an optimized way; IREE HAL implementation and the underlying GPU driver stack is expected to be a thin layer without much smarts and magic.</p> <p>Therefore when implementing the IREE HAL using CUDA, we use the driver API instead of the runtime API. At runtime the HAL CUDA driver will load the <code>libcuda.so</code>/<code>nvcuda.dll</code> library dynamically and query a subset of the CUDA driver API used in HAL via the <code>cuGetProcAddress()</code> API.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#gpu-objects","title":"GPU Objects","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#driver","title":"Driver","text":"<p>There is no direct CUDA construct that map to the IREE HAL <code>iree_hal_driver_t</code> abstraction. We use it to hold the dynamic symbols loaded for all devices, and device enumeration and creation.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_cuda_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to CUDA GPU device by wrapping a <code>CUdevice</code>. For each device, right now we create two <code>CUstream</code>s--one for issuing commands for memory allocation and kernel lauches as instructed by the program; the other for issue host callback functions after dispatched command buffers completes. See synchronization section regarding the details.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#async-allocation","title":"Async allocation","text":"<p>The CUDA HAL drivers supports async allocation (<code>iree_hal_device_queue_alloca()</code> and <code>iree_hal_device_queue_dealloca()</code>) via CUDA stream ordered memory allocation.</p> <p>The <code>async_allocations</code> in the <code>iree_hal_cuda_device_params_t</code> struct allows to enable this feature.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#command-buffer","title":"Command buffer","text":"<p><code>iree_hal_command_buffer_t</code> is a recording of commands to issue to the GPU; when the command buffer is submitted to the device it's then actually executed on the GPU asynchronously.</p> <p>Two implementations of <code>iree_hal_command_buffer_t</code> exist in the CUDA HAL driver--one backed by <code>CUgraph</code> and the other backed by <code>CUstream</code>.</p> <p><code>CUgraph</code> conceptually matches <code>iree_hal_command_buffer_t</code> better given it's a recording of commands to issue to the GPU. Also using the <code>CUgraph</code> API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams. Therefore, the <code>CUgraph</code>-backed implementation is a more natural one. Though note that <code>CUgraph</code> API is meant to be used for recording once and replying multiple times and there may be a performance penalty to using <code>CUgraph</code> API for one-shot command buffer.</p> <p>The <code>CUstream</code>-backed implementation just issues commands directly to a <code>CUstream</code> when recording. Commands issued to <code>CUstream</code> can be immediately sent to the GPU for execution; there is no recording and replaying separation. In order to match the recording semantics of <code>iree_hal_command_buffer_t</code>, to use the <code>CUstream</code>-backed command buffer, we need to first record the command buffer into an in-memory <code>iree_hal_deferred_command_buffer_t</code>, and then when applying the command buffer, we create a new <code>CUstream</code>-backed implementation.</p> <p>The <code>command_buffer_mode</code> in the <code>iree_hal_cuda_device_params_t</code> struct allows to select which implementation to use.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#allocator","title":"Allocator","text":"<p>The allocator will forward allocation requests to <code>cuMemHostAlloc()</code> for host local memory, <code>cuMemAlloc()</code> for device local and host invisible memory, and <code>cuMemAllocManaged()</code> for device local and host visible memory.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#buffer","title":"Buffer","text":"<p>CUDA buffers are represented either as a host pointer or a device pointer of type <code>CUdeviceptr</code>.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#executable","title":"Executable","text":"<p><code>iree_hal_executable_t</code> maps naturally to <code>CUmodule</code>.</p> <p>The compiler generates a FlatBuffer containing a PTX image as well as a list of entry point functions and their associated metadata (names, workgroup size, dynamic shared memory size, etc.). At runtime, the CUDA HAL driver loads the PTX image and creates <code>CUfunction</code>s out of it for various entry points.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#synchronization","title":"Synchronization","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#event","title":"Event","text":"<p><code>iree_hal_event_t</code> right now is not used in the compiler so it's not yet implemented in the CUDA HAL driver.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#semaphore","title":"Semaphore","text":"<p>The IREE HAL uses semaphores to synchronize work between host CPU threads and device GPU streams. It's a unified primitive that covers all directions--host to host, host to device, device to host, and device to device, and allows flexible signal and wait ordering--signal before wait, or wait before signal. There is no limit on the number of waits of the same value too.</p> <p>The core state of a HAL semaphore consists of a monotonically increasing 64-bit integer value, which forms a timeline--signaling the semaphore to a larger value advances the timeline and unblocks work waiting on some earlier values. The semantics closely mirrors Vulkan timeline semaphore.</p> <p>In CUDA, there is no direct equivalent primitives providing all the capabilities needed by the HAL semaphore abstraction:</p> <ul> <li>Stream memory operations provides <code>cuStreamWriteValue64()</code> and   <code>cuStreamWaitValue64()</code>, which can implment HAL semaphore 64-bit integer value   signal and wait. Though these operations require device pointers and cannot   accepts pointers to managed memory buffers, meaning no support for the host.   Additionally, per the spec, \"synchronization ordering established through   these APIs is not visible to CUDA. CUDA tasks that are (even indirectly)   ordered by these APIs should also have that order expressed with   CUDA-visible dependencies such as events.\" So it's not suitable for   integration with other CUDA components.</li> <li>For external resource interoperability, we have APIs   like <code>cuSignalExternalSemaphoresAsync()</code> and <code>cuWaitExternalSemaphoresAsync()</code>,   which can directly map to Vulkan timeline semaphores. Though these APIs are   meant to handle exernal resources--there is no way to create   <code>CUexternalSemaphore</code> objects directly other than <code>cuImportExternalSemaphore()</code>.</li> </ul> <p>Therefore, to implement the support, we need to leverage multiple native CPU or CUDA primitives under the hood.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cuevent-capabilities","title":"<code>CUevent</code> capabilities","text":"<p>The main synchronization mechanism is CUDA event--<code>CUevent</code>. As a functionality and integration baseline, we use <code>CUevent</code> to implement the IREE HAL semaphore abstraction.</p> <p><code>CUevent</code> natively supports the following capabilities:</p> <ul> <li>State: binary; either unsignaled or signaled. There can exist multiple   waits (e.g., via <code>cuEventSynchronize()</code> or <code>cuGraphAddEventWaitNode()</code>) for   the same <code>CUevent</code> signal (e.g., via <code>cuEventRecord()</code> or   <code>cuGraphAddEventRecordNode()</code>).</li> <li>Ordering: must be signal before wait. Waiting before signal would mean   waiting an empty set of work, or previously recorded work.</li> <li>Direction: device to device, device to host.</li> </ul> <p>We need to fill the remaining capability gaps. Before going into details, the overall approach would be to:</p> <ul> <li>State: we need a 64-bit integer value timeline. Given the binary state of   a <code>CUevent</code>, each <code>CUevent</code> would just be a \"timepoint\" on the timeline.</li> <li>Ordering: we need to defer releasing the workload to the GPU until the   semaphore waits are reached on the host, or we can have some device   <code>CUevent</code> to wait on.</li> <li>Direction: host to host and host to device is missing; we can support that   with host synchronization mechanisms.</li> </ul>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#signal-to-wait-analysis","title":"Signal to wait analysis","text":"<p>Concretely, for a given HAL semaphore, looking at the four directions:</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cpu-signal","title":"CPU signal","text":"<p>A CPU thread signals the semaphore timeline to a new value.</p> <p>If there are CPU waits, it is purely on the CPU side. We just need to use common CPU notification mechanisms. In IREE we have <code>iree_event_t</code> wrapping various low-level OS primitives for it. So we can just use that to represent a wait timepoint. We need to keep track of all CPU wait timepoints in the timeline. After a new signaled value, go through the timeline and notify all those waiting on earlier values.</p> <p>If there are GPU waits, given that there are no way we can signal a <code>CUevent</code> on CPU, one way to handle this is to cache and defer the submission batches by ourselves until CPU signals past the desired value. To support this, we would need to implement a deferred/pending actions queue.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#gpu-signal","title":"GPU signal","text":"<p>GPU signals can only be through a <code>CUevent</code> object, which has a binary state. We need to advance the timeline too. One way is to use <code>cuLaunchHostFunc()</code> to advance from the CPU side with <code>iree_hal_semaphore_list_signal()</code>. This additionally would mean we can reuse the logic form CPU signaling to unblock CPU waits.</p> <p>After advancing the timeline from the CPU side with <code>cuLaunchHostFunc()</code>, we can release more workload from the deferred/pending actions queue to the GPU. Though, per the documentation of <code>cuLaunchHostFunc()</code>, \"the host function must not make any CUDA API calls.\" So we cannot do that directly inside <code>cuLaunchHostFunc()</code>; we need to notify another separate thread to call CUDA APIs to push more work to the GPU. So the deferred/pending action queue should have an associcated thread.</p> <p>For GPU waits, we can also leverage the same logic--using CPU signaling to unblock deferred GPU queue actions. Though this is performant, given that the CPU is involved for GPU internal synchronization. We want to use <code>CUevent</code> instead:</p> <ul> <li>We keep track of all GPU signals in the timeline. Once we see a GPU wait   request, try to scan the timeline to find a GPU signal that advances the   timeline past the desired value, and use that for waiting instead. (This   actually applies to CPU waits too, and it's an optimization over pure   CPU side <code>iree_event_t</code> polling.)</li> <li>We may not see GPU signal before seeing GPU wait requests, then we can also   keep track of all GPU waits in the timeline. Later once see either a CPU   signal or GPU signal advancing past the waited value, we can handle them   accordingly--submitting immediately or associating the <code>CUevent</code>.   This would also guarantee the requirement of <code>CUevent</code>--recording should   happen before waiting.</li> <li>We can use the same <code>CUevent</code> to unblock multiple GPU waits. That's allowed,   though it would mean we need to be careful regarding <code>CUevent</code> lifetime   management. Here we can use reference counting to see how many timepoints   are using it and automatically return to a pool once done.</li> </ul> <p>Another problem is that per the <code>cuLaunchHostFunc()</code> doc, \"the function will be called after currently enqueued work and will block work added after it.\" We don't want the blocking behavior involving host. So we can use a dedicated <code>CUstream</code> for launching the host function, waiting on the <code>CUevent</code> from the original stream too. We can also handle resource deallocation together there.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#data-structures","title":"Data structures","text":"<p>To summarize, we need the following data structures to implement HAL semaphore:</p> <ul> <li><code>iree_event_t</code>: CPU notification mechanism wrapping low-level OS primitives.   Used by host wait timepoints.</li> <li><code>iree_event_pool_t</code>: a pool for CPU <code>iree_event_t</code> objects to recycle.</li> <li><code>iree_hal_cuda_event_t</code>: GPU notification mechanism wrapping a <code>CUevent</code> and   a reference count. Used by device signal and wait timepoints. Associates with   a <code>iree_hal_cuda_event_pool_t</code> pool--returns to the pool directly on once   reference count goes to 0.</li> <li><code>iree_hal_cuda_event_pool_t</code>: a pool for GPU <code>iree_hal_cuda_event_t</code> objects   to recycle.</li> <li><code>iree_hal_cuda_timepoint_t</code>: an object that wraps a CPU <code>iree_event_t</code> or   GPU <code>iree_hal_cuda_event_t</code> to represent wait/signal of a timepoint on a   timeline.</li> <li><code>iree_hal_cuda_timepoint_pool_t</code>: a pool for <code>iree_hal_cuda_timepoint_t</code>   objects to recycle. This pool builds upon the CPU and GPU event pool--it   acquires CPU/GPU event objects there.</li> <li><code>iree_hal_cuda_timeline_semaphore_t</code>: contains a list of CPU wait and GPU   wait/signal timepoints.</li> <li><code>iree_hal_cuda_queue_action_t</code>: a pending queue action (kernel launch or   stream-ordered allocation).</li> <li><code>iree_hal_cuda_pending_queue_actions_t</code>: a data structure to manage pending   queue actions. It provides APIs to enqueue actions, and advance the queue on   demand--queue actions are released to the GPU when all their wait semaphores   are signaled past the desired value, or we can have a <code>CUevent</code> object already   recorded to some <code>CUstream</code> to wait on.</li> </ul>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/design-roadmap/","title":"Design roadmap","text":"<p>A not-so-concise walkthrough of various IREE features that are in the design process and planned for future versions. A lot of the questions around how the IREE IR is designed and why certain components exist (such as the VM) hopefully become much clearer when seeing where we want to go with the infrastructure we are building (as opposed to where we currently are with our MVP slice). This document is not meant to encompass the entire design of any individual feature and if there's interest please say hi on the iree-discuss mailing list.</p> <ul> <li>Design roadmap<ul> <li>Input Dialects<ul> <li>Quantization</li> </ul> </li> <li>flow: Data- and Execution-Flow Modeling<ul> <li>Avoiding Readbacks with flow.stream</li> <li>Threading flow.stream through the CFG</li> <li>Predication of flow.dispatch</li> <li>Deduping flow.executables</li> <li>Rematerializing CSE'd Expressions</li> <li>Device Placement</li> </ul> </li> <li>hal: Hardware Abstraction Layer and Multi-Architecture Executables<ul> <li>Allow Targets to Specify hal.interfaces</li> <li>Target-specific Scheduling Specialization</li> <li>Buffer Usage Tracking</li> <li>Batched Executable Caching and Precompilation</li> <li>Target-aware Executable Compression</li> <li>Target-aware Constant Compression</li> <li>Command Buffer Stateful Deduplication</li> <li>Resource Timeline</li> <li>Transient Tensor Ringbuffer</li> <li>Timeline Semaphores on the Module ABI</li> <li>GPU-like CPU Scheduling</li> </ul> </li> <li>vm: Lightweight Virtual Machine<ul> <li>Coroutines for Batching and Cooperative Scheduling<ul> <li>Cellular Batching</li> </ul> </li> <li>Lowering to LLVM IR</li> <li>Improved Type Support</li> <li>Indirect Command Buffer/On-Accelerator Execution</li> </ul> </li> </ul> </li> </ul>"},{"location":"developers/design-docs/design-roadmap/#input-dialects","title":"Input Dialects","text":""},{"location":"developers/design-docs/design-roadmap/#quantization","title":"Quantization","text":"<p>It's assumed that any work related to quantization/compression has happened prior to lowering into IREE dialects. Our plan is to use the proposed Quantization Transforms to achieve both training and inference-time quantization of types in a way that preserves maximum accuracy. IREE will support running with original unquantized floats in all cases, allowing for a smooth on-ramp to quantization and the gains in performance and reduction in model size that come from it.</p> <p>As future work IREE would like to move beyond these transformation-directed approaches to quantization and interface directly to frontends which have a defined enough type system to represent accurate quantized (and otherwise compressed) computations directly, not relying exclusively on compiler-side type inference transforms.</p>"},{"location":"developers/design-docs/design-roadmap/#flow-data-and-execution-flow-modeling","title":"<code>flow</code>: Data- and Execution-Flow Modeling","text":"<p>The <code>flow</code> dialect is designed to allow us to extract as much concurrency as possible from a program and partition IR into the scheduling and execution domains. Today we have the IR structure and transformation flow in place but have not yet got to the most interesting things such an infrastructure enables. A majority of the largest performance, latency, and memory usage improvements IREE can offer are determined first here and all following lowerings benefit. The fastest code is the code you don't execute and the smallest allocation is the allocation you don't make ;)</p>"},{"location":"developers/design-docs/design-roadmap/#avoiding-readbacks-with-flowstream","title":"Avoiding Readbacks with <code>flow.stream</code>","text":"<p>A majority of the readbacks we have today (manifested as <code>flow.tensor.load.*</code> ops) will be removed when we have an HLO tensor-&gt;primitive conversion. There will still be cases when readbacks are required for correctness but they usually fall into a small set of usage patterns. For those that don't this is one place where IREE will warn about performance issues, allowing programs that perform suboptimally but encouraging authors to adjust their input model to enable better behavior. The IREE VM also has specific support for hiding readback latency in an efficient way via coroutines.</p> <p>The most common case we are currently seeing in the IR is that of dynamic copies where the offsets are dependent on the result of previous computations. Source models may have top-k + gather operations, for example. These appear as a <code>flow.stream</code>, a <code>flow.tensor.load</code>, and then another <code>flow.stream</code> that uses the loaded value for a <code>flow.tensor.update</code> (or other operation):</p> <pre><code>%index_tensor = flow.ex.stream.fragment(...) -&gt; tensor&lt;i32&gt; { ... }\n%index = flow.tensor.load %index_tensor : tensor&lt;i32&gt;\n%result = flow.ex.stream.fragment(%arg0 = %index : i32, ...) -&gt; ... {\n  %0 = flow.dispatch ...\n  %1 = flow.tensor.update %0, %arg2[%index] : tensor&lt;10xf32&gt; -&gt; tensor&lt;1x10xf32&gt;\n  ...\n}\n</code></pre> <p>Today the <code>flow.tensor.update</code> turns into HAL command buffer transfer operations that must have their offsets known at recording time. This is a limitation of <code>vkCmdCopyBuffer</code> but not a fundamental limitation of any hardware. In fact several drivers implement copies as small built-in shader programs meaning that we could perform the same expansion here with the right primitives. This would allow, in the above example, both the index to be computed and the tensor to be updated within the same stream to entirely remove the host round-trip.</p>"},{"location":"developers/design-docs/design-roadmap/#threading-flowstream-through-the-cfg","title":"Threading <code>flow.stream</code> through the CFG","text":"<p>The current <code>flow.ex.stream.fragment</code>, as denoted by the <code>ex</code>perimental tag, is a temporary implementation designed to get the concept of streams lowered to the HAL dialect. For streams to be effective at modeling larger concurrency scopes they need to be able to move across branches in the CFG. This intuitively follows exactly what one would do if recording commands in C:</p> <pre><code>vkCmdCopyBuffer(cmd, ...);\nif (some_flag) {\n  vkCmdBindPipeline(cmd, ..., pipeline_a);\n} else {\n  vkCmdBindPipeline(cmd, ..., pipeline_b);\n}\nvkCmdDispatch(cmd, ...);\n</code></pre> <p>The corresponding <code>flow</code> IR:</p> <pre><code>  flow.stream.append[%s0](...) {\n    flow.tensor.update ...\n  }\n  %b = arith.cmpi ne %some_flag, ...\n  cond_br %b, ^a(%s0), ^b(%s0)\n^a(%s1):\n  flow.stream.append[%s1](...) {\n    flow.dispatch @pipeline_a, ...\n  }\n  br ^end(%s1)\n^b(%s2):\n  flow.stream.append[%s2](...) {\n    flow.dispatch @pipeline_b, ...\n  }\n  br ^end(%s2)\n^end(%s3):\n  ...\n</code></pre> <p>This allows the entire stream to be lowered into one command buffer without the need for any host round-trips. The conversion into the <code>flow</code> dialect will walk the CFG and attempt to thread the <code>flow.stream</code> values through so long as there are no external dependencies.</p>"},{"location":"developers/design-docs/design-roadmap/#predication-of-flowdispatch","title":"Predication of <code>flow.dispatch</code>","text":"<p>While the <code>flow.stream</code> threading through the CFG can remove many of the simpler conditional dispatches there will always be some that will have their execution dependent on the result of prior dispatches. For these a <code>flow.cond_dispatch</code> will allow a condition to be provided that must be true for the dispatch to actually be performed.</p> <p>For targets that natively support predication in their command buffers (such as D3D12's ID3D12GraphicsCommandList::SetPredication) this provides a host round-trip-free way of conditionally executing dispatches and transfers. Unfortunately Vulkan support is still lacking, though Nvidia supports the VK_EXT_conditional_rendering extension that exposes the same behavior.</p> <p>For targets that do not support predication natively it's still possible to emulate predication with indirect dispatches. In this model the workgroup counts normally used to dispatch execution are sourced from another device buffer at the time the dispatch is made instead of sourced from the command buffer at the time the dispatch is recorded. Degenerate dispatches with counts of <code>0, 0, 0</code> allow for effective neutering of the dispatch with minimal overhead (vs. the significant penalty of a host round-trip!).</p> <p>By modeling such predication at the <code>flow</code> level we are able to lower into the HAL with target-aware predication semantics and fuse indirect dispatch workgroup count calculations into existing dispatches already being performed such that overhead is reduced.</p>"},{"location":"developers/design-docs/design-roadmap/#deduping-flowexecutables","title":"Deduping <code>flow.executable</code>s","text":"<p>While still in the <code>flow</code> dialect, the executables are target-agnostic. This makes simple IR tree diffing a potential solution to deduplication. Since most of the dispatches originate from the same source-language library calls in input frameworks there's a high likelihood of duplication, and depending on when inlining is performed we may have stronger or weaker ability to perform the deduplication. Thanks to the MLIR canonicalization pass (that ensures ops are rearranged into consistent canonical representations) the IR comparisons can be done rather trivially.</p>"},{"location":"developers/design-docs/design-roadmap/#rematerializing-csed-expressions","title":"Rematerializing CSE'd Expressions","text":"<p>Common subexpression elimination is performed many times during lowering, however there comes a point where the CSE can introduce false dependencies and additional allocations that are otherwise avoidable. For example if a broadcasting operation is CSE'd and then the result is used by two or more operations that are scheduled independently what would have been a relatively cheap lowering of the broadcast to a simple index remapping now becomes an additional dispatch, materialization of an intermediate tensor, and a barrier:</p> <pre><code>%bcast = \"mhlo.broadcast_in_dim\"(%cst) : (tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt;\n%mul1 = mhlo.multiply %arg0, %bcast : tensor&lt;1024x10xf32&gt;\n// (pretend something here that prevents fusion)\n%mul2 = mhlo.multiply %arg1, %bcast : tensor&lt;1024x10xf32&gt;\n</code></pre> <pre><code>%bcast = flow.dispatch.region(%cst : tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %0 = \"mhlo.broadcast_in_dim\"(%cst) : (tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt;\n  return %0 : tensor&lt;1024x10xf32&gt;\n}\n// a barrier will be required here\n%mul1 = flow.dispatch.region(%arg0 : tensor&lt;1024x10xf32&gt;, %bcast : tensor&lt;1024x10xf32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %1 = mhlo.multiply %arg0, %bcast : tensor&lt;1024x10xf32&gt;\n  return %1 : tensor&lt;1024x10xf32&gt;\n}\n%mul2 = flow.dispatch.region(%arg1 : tensor&lt;1024x10xf32&gt;, %bcast : tensor&lt;1024x10xf32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %2 = mhlo.multiply %arg1, %bcast : tensor&lt;1024x10xf32&gt;\n  return %2 : tensor&lt;1024x10xf32&gt;\n}\n</code></pre> <p>Instead the broadcast should be rematerialized inside of both dispatch regions as the cost of doing so is significantly less in compute resources and then the intermediate tensor will not be required at all. Though at first it may seem counter-intuitive to undo such a critical optimization as CSE (both to code size and often to compute) but here it's something we must carefully balance while looking at the whole system. It gets even more important when considering multi-device execution as the cost of sharing memory and synchronizing may be extremely non-trivial.</p>"},{"location":"developers/design-docs/design-roadmap/#device-placement","title":"Device Placement","text":"<p>While still within the <code>flow</code> dialect we have the ability to easily split streams and safely shuffle around operations. Target execution backends can opt into such behavior to ensure that device restrictions such as maximum in-flight memory, maximum scheduling depth, and capabilities are observed. For heterogeneous configurations the intent is that certain operations, dispatches, and streams can be attributed to specify which device categories they should be lowered. The constraint solving that takes place can be provided with generic heuristics (\"big GEMMs go on the accelerator\"), profile-guided databases based on benchmarks, learned traits via ML, etc.</p>"},{"location":"developers/design-docs/design-roadmap/#hal-hardware-abstraction-layer-and-multi-architecture-executables","title":"<code>hal</code>: Hardware Abstraction Layer and Multi-Architecture Executables","text":"<p>As the IREE HAL is designed almost 1:1 with a compute-only Vulkan API many of the techniques classically used in real-time graphics apply. The benefit we have by modeling our usage of such a low-level API in IR is that the normal work - some of which is very non-trivial - for managing allocations, tracking resource lifetime, and ensuring proper synchronization/barriers is something we can apply the full force of an offline compiler against.</p>"},{"location":"developers/design-docs/design-roadmap/#allow-targets-to-specify-halinterfaces","title":"Allow Targets to Specify <code>hal.interface</code>s","text":"<p>The <code>hal.interface</code> op specifies the ABI between the scheduler and the device containing the buffer bindings and additional non-buffer data (parameters, shapes, specialization flags, etc). Today a na\u00efve ordering is used uniformly for all targets however it is possible for target backends to opt into providing their own interfaces based on target configuration. The same <code>hal.executable</code> may have multiple interfaces and the same backend may use one or more. This is useful for when target capabilities may vary at runtime, such as the number of available storage buffer bindings in Vulkan. By exposing a few <code>hal.interface</code> variants with different binding amounts the Vulkan backend could make better use of the larger number of bindings available at runtime while still providing support for smaller configurations.</p> <p>Once we have multiple <code>hal.interface</code>s defined for executables the scheduler needs to emit HAL ops that properly switch between them. By having a canonical form for bindings we can ensure that only the differences between the interfaces will need additional code.</p>"},{"location":"developers/design-docs/design-roadmap/#target-specific-scheduling-specialization","title":"Target-specific Scheduling Specialization","text":"<p>Though the <code>flow</code> dialect attempts to fuse as many ops as possible into dispatch regions, it's not always possible for all target backends to schedule a region as a single dispatch. A classic example is algorithms like parallel reduction commonly used on GPUs that may require many dispatches to identical executables, while other algorithms may vary the executables they use based on the input parameters such as shape or the target runtime device support.</p> <p>By default the <code>flow.dispatch</code> executable translation to <code>hal.executable</code>s is performed 1:1 and it is assumed that a single dispatch is required. Extending target backends with scheduling interfaces (enabling them to opt into different scheduling behavior) will allow the backends to emit any number of <code>hal.executable</code>s and any stream commands (such as additional dispatches or transfers) they may need. This is effectively equivalent to what would be done at runtime only because we are still operating on IR prior to buffer allocation and can use the <code>hal</code> ringbuffer primitive. Through this we can elide many of the allocations that would otherwise be required at runtime (and the concurrency-limiting false dependencies that usually come along with scratch memory).</p> <p>Since the algorithm used may vary based on the parameters of the dispatch (such as the shape of the reduction which may be dynamically determined) scheduling specialization may occur even when targeting a single backend. In many cases folding and canonicalization can eliminate the overhead as whether one dynamically computed workgroup size is used instead of another the same IR is present.</p>"},{"location":"developers/design-docs/design-roadmap/#buffer-usage-tracking","title":"Buffer Usage Tracking","text":"<p>Many explicit hardware APIs require knowing how buffers are used alongside with where they should be located. For example this additional information determines caching policy on buffer accesses (write-through, write-back, etc), visibility of writes across compute units, and the possible MMU properties that may need to be maintained/matched for the buffer. By using the SSA-form value-semantics of the MLIR <code>tensor</code> as used in the <code>flow</code> dialect we have complete information of where buffers may be used or at least where they enter or leave regions where we can derive such information.</p> <p>Analysis passes can run over IR to attribute tensors such that when allocation is performed when lowering to the <code>hal</code> dialect we do so from an allocator compatible with where the buffer will be used, with memory types chosen based on the potential cost and location of operations performed (write-only on host vs. read-write on host and device, etc), and with usage bits indicating what kind of operations may be performed on the buffer. Many of these are local transformations as most buffers are only live within very small regions such as the <code>flow.stream</code> encompassing their usage.</p> <p>Traditional systems need to either use very permissive buffer properties or heuristics that can introduce additional non-trivial overhead when such heuristics are incorrect. For example, OpenGL had several such usage hints that drivers were then able to use but almost no drivers behaved as desired in all cases and it lead to additional memory ghosting, copies, readbacks, and unpredictable performance. For almost all uses of the buffers within an IREE invocation we instead can know precisely where and how buffers may need to be moved and do it a minimum number of times if it is required.</p>"},{"location":"developers/design-docs/design-roadmap/#batched-executable-caching-and-precompilation","title":"Batched Executable Caching and Precompilation","text":"<p>For targets that may require runtime preprocessing of their executables prior to dispatch, such as SPIR-V or MSL, the IREE HAL provides a caching and batch compilation mechanism based on Vulkan's Pipeline Cache.</p> <p>Today each executable is compiled on-demand and cached only for the process lifetime. Though some drivers may provide their own caching we can make better use of the explicit caching and compilation behavior with the additional information we have in the compiler.</p> <p>For any given entry point (or group of entry points) into an IREE module we can perform reachability analysis to know which executables may be executed when that entry point is invoked. In this way we can emit pre-invocation compilation checks (similar to an <code>std::call_once</code> block) that provides all required executables for compilation and allows more efficient compilation through multithreading the compiler invocations. These same compilation caching function can be exposed and invoked manually by an application to force pre-compilation when it is least likely to impact the user, such as a post-install/first-run step or concurrently while other application features are loading.</p> <p>We can use zero or more scoped caches for executables within a module. Completely dynamic modules (such as those emitted in eager-mode usage) may avoid the caching overhead entirely, while modules that have several primary usage modes (such as training and inference) may choose to use independent caches for each such mode.</p> <p>The caches generated can then be retrieved and saved by the hosting application. Upon the next execution the application can provide the caches and if still valid they will be used to avoid compilation.</p>"},{"location":"developers/design-docs/design-roadmap/#target-aware-executable-compression","title":"Target-aware Executable Compression","text":"<p>An advantage of representing executable binaries in IR after translation is that we can apply various post-compilation compression and minification techniques while still know precisely where the executable will be used. This is extremely important for SPIR-V as it is not designed to be a small at-rest format. Though the biggest lever we have to control generated code size is higher-level deduplication and specialization there will still be a sufficiently large number of executable binaries we will need to embed within the final modules and having targeted approaches for reducing their size beyond just \"gzip everything\" is very powerful.</p> <p>For example, SMOL-V is a fantastic lossless SPIR-V compression technique that, when coupled with modern dictionary-based compression algorithms, can save significant binary size. As a data point, the SPIR-V corpus SMOL-V uses for testing goes from 4.8MiB of raw SPIR-V to 348KiB of compressed SMOL-V.</p> <p>Combined with Batched Executable Caching and Precompilation we can easily use shared dictionaries and other cross-artifact compression in a relatively plug-in way.</p>"},{"location":"developers/design-docs/design-roadmap/#target-aware-constant-compression","title":"Target-aware Constant Compression","text":"<p>It's still an area that needs more research but one goal of the IREE design was to enable efficient target- and context-aware compression of large constants (typically model weights/parameters/embeddings). This may mean reusing existing hardware compression formats on GPUs, ML accelerator-specific formats, or very-low-bit-depth (1-4 bit per value) quantization techniques that cannot be directly used without first decompressing. The inspiration here is formats like Crunch and Basis Universal that perform \"supercompression\", and we may even be able to use these directly as then we can make use of GPU hardware samplers to do the 4-bit to 32-bit decompression, etc.</p>"},{"location":"developers/design-docs/design-roadmap/#command-buffer-stateful-deduplication","title":"Command Buffer Stateful Deduplication","text":"<p>The IREE HAL - much like Vulkan it is based on - eschews much of the state that traditional APIs have in favor of (mostly) immutable state objects (pipeline layouts, pipeline states, descriptor sets, etc). There are still a few stateful entry points in the API, though, and deduplicating or reordering redundant calls can reduce both IR, API, and execution overhead.</p> <p>The key place this will have the largest impact is around descriptor set bindings and push descriptors, both of which are state and can have non-trivial setup overhead. A canonicalization for such commands that inspects the target <code>hal.command_buffer</code> to see if the same state was set prior and code motion to move such commands out of loop bodies when possible would be helpful.</p>"},{"location":"developers/design-docs/design-roadmap/#resource-timeline","title":"Resource Timeline","text":"<p>A core concept of the IREE scheduler that allows for overlapping in-flight invocations is that of the resource timeline. This identifies module state that can be in use by multiple invocations and assigns timeline milestones denoting when the resource will be in the appropriate state for the current invocation to proceed. Conceptually it is like a epoch-based synchronization mechanism as commonly found in garbage collectors to allow for lock-free asynchronous memory reclamation.</p> <p>The advantage we have in the IR is that we know both the usage of all resources thanks to buffer usage tracking and the synchronization domains of all resources (in most cases). This allows us to effectively assign one timeline semaphore per writeable resource while in practice having far fewer than 1:1, as for example if two resources are only ever written in the same command buffer only one semaphore is needed to signal the completion of both writes.</p> <p>By transforming IR to sink all resource reads and writes closest to where the value is used we can enlarge the time windows that can overlap across invocations that may share those resources. This is similar to what out-of-order CPUs do with register renaming/reorder buffers/etc and something we can apply some traditional instruction scheduling techniques to (only here our 'instructions' are entire command buffer dispatches/transfers).</p> <p>Two degenerate cases of this approach are that of resource indirection (<code>util.ptr&lt;tensor&lt;T&gt;&gt;</code>) and dynamic resource shapes. In these two cases it may not be possible to continue recording commands even if we are able to ensure execution is appropriately synchronized. This is where indirect dispatch, predication, indirect command buffers, and VM coroutines can all help cover for the times where we are unable to transform away the indirection or emit shape logic without data dependencies.</p>"},{"location":"developers/design-docs/design-roadmap/#transient-tensor-ringbuffer","title":"Transient Tensor Ringbuffer","text":"<p>(When properly implemented) almost all buffers required during execution never escape the command buffers they are used in or a single VM invocation. We can trivially identify this from the explicit captures of <code>flow.stream</code> and <code>flow.dispatch</code> ops and the fact that all tensor types have value-semantics. Only those tensor values loaded-from/stored-to module state or that cross the exported module function boundary need special consideration while almost everything else can live transiently only so long as it is required during execution.</p> <p>Thanks to this information about buffer usage and lifetime we can use a ringbuffer to store the transient tensor data and other required data reservations such as uniform buffers used to pass dynamic parameters (shapes, flags, etc) into dispatches. This gives the compiler and the application a knob that allows them to control maximum concurrency (by having a very large ringbuffer) or maximum memory usage (by having a minimally small ringbuffer).</p> <p>Allocating tensors from the ringbuffer does not require sophisticated runtime packing as we can emit IR to calculate required sizes for dynamically shaped tensors. Whether a basic block reserves <code>%sz = arith.constant 42 : index</code> bytes or <code>%sz = std.muli %cst, %dyn_dim : index</code> bytes doesn't materially change how the allocations are performed. Since almost all usage involves simple write head bumps there is no need for ahead-of-time memory planning or large fixed allocations, and since no buffer within the ringbuffer can alias we can have coarse (read: low overhead) guarantees about the availability of certain regions of the ringbuffer (\"when this event is signaled all prior ringbuffer writes have completed\").</p> <p>Usually any planning we may want to perform can be done in IR via code motion. For example applying traditional algorithms used to reduce register pressure will help us attain narrower live windows within the ringbuffer leading to a larger number of in-flight operations for the same ringbuffer memory usage.</p> <p>We may end up using both a classical ringbuffer and a variant known as the bip buffer because it is better for descriptor set utilization (as we can provide many dispatch parameters with a single base offset bound once at the beginning of a region).</p>"},{"location":"developers/design-docs/design-roadmap/#timeline-semaphores-on-the-module-abi","title":"Timeline Semaphores on the Module ABI","text":"<p>Functions calls made across modules (either from C++ into the VM, VM-&gt;VM, or VM-&gt;C++) should be able to define timeline semaphores used to wait and signal on the call. We can do this by making all exports automatically have the semaphores and then make invocations populate them if they were not provided by the caller. In this way we can allow multiple invocations of exported functions to chain naturally with internal asynchronous workloads, turning most IREE invocations into just recording of command buffers that can never block.</p> <p>When combined with VM coroutine support we even have the ability to interleave any required host execution between the wait and signal semaphores provided such that the caller never knows on which device execution is taking place. It's still possible to provide synchronous wrappers that emulate blocking behavior but by having the core system designed around a single system-supported primitive we avoid the need for additional things like interrupt watchdog threads, implicit blocking, and other pitfalls.</p>"},{"location":"developers/design-docs/design-roadmap/#gpu-like-cpu-scheduling","title":"GPU-like CPU Scheduling","text":"<p>One approach to using multiple cores on a CPU is to perform interior parallelization of operations such as OpenMP or library-call-based custom thread pools (gemmlowp). This works when each individual operation is relatively costly vs. potential pipeline bubbles caused by work spinning down near the end of an operation and spinning up at the beginning of the next.</p> <p>IREE is designed to handle many more workloads - some of which have very narrow shapes but very deep pipelines (like search algorithms) - such that the above approach of multithreading within ops becomes a bottleneck. These workloads are traditionally very poorly handled by frameworks and issues with oversubscription, pipeline stalls, and suboptimal system schedulers (such as on Android) can lead to more time being spent thrashing about than actually executing real work.</p> <p>The approach we take here is to treat the cores of a CPU as if they were computation units on a GPU, each able to perform some set of heterogeneous work independent of others units. This means that the concurrency we are trying to model at the <code>flow</code> level and communicate to the runtime via the <code>hal</code> that explicitly states which dispatches can overlap and the size of the workgroups can trivially be used to distribute this work over many cores exactly as a GPU would do it. Integration with library calls that may require their own threading (such as Ruy) requires that they be able to use the IREE thread pool instead of their own.</p> <p>In this way we can avoid pipeline bubbles and other latency-inducing unpredictable scheduling. This does not mean that we treat individual units of work at the same scale as we would for GPUs, but instead that we tile and have one or more processing units that allows us to work on those tiles. Whether the tile size is defined by a library call contract, heuristics, or empirically is TBD, but expect workgroup sizes in the thousands to millions of invocations vs. normal GPU workgroup sizes in the dozens to hundreds of invocations.</p> <p>To achieve this style of scheduling efficiently we'll likely use something like marl as the scheduler. Marl provides cross-platform low-overhead fibers and is compatible with this style of scheduling as it was built for the Swiftshader software rasterizer.</p> <p>Even if IREE was only targeting CPUs the assertion is that we would still want to schedule this way and it's only an incidental benefit that if building for heterogeneous targets the scheduling code may be shared (just with a different divisor for workgroup count calculations).</p>"},{"location":"developers/design-docs/design-roadmap/#vm-lightweight-virtual-machine","title":"<code>vm</code>: Lightweight Virtual Machine","text":"<p>The VM is designed as a dynamic linkage ABI, stable bytecode representation, and intermediate lowering IR. Many of the optimizations we can perform on it will benefit all use cases (such as when lowering to LLVM IR) by allowing higher-level program transformations around synchronization that are difficult to perform on arbitrary LLVM IR.</p>"},{"location":"developers/design-docs/design-roadmap/#coroutines-for-batching-and-cooperative-scheduling","title":"Coroutines for Batching and Cooperative Scheduling","text":"<p>One of the largest features currently missing from the VM is coroutines (aka user-mode fiber scheduling). Coroutines are what will allow us to have multiple in-flight invocations into a module - some of which may be waiting on external events - without the need for complex multithreading logic or state machine machinations.</p> <p>In many cases once semaphores are exposed to callers we will not need to yield in the VM. The user will call into the module with provided semaphores, the work to perform will be recorded to one or more command buffers and submitted to the device, and then control return will return to the caller immediately.</p> <p>In cases requiring host readbacks that we were not able to remove, however, additional VM code may need to run prior to when the final semaphore is signaled. To preserve the asynchronous interface and immediate execution guarantees the compiler can emit explicit yield points (<code>vm.yield</code>) that are known-good locations for yielding (such as most resources not required after the yield having been flushed/discarded, partial synchronization scope availability if other work may be able to execute concurrently irrespective of the yielded coroutine, etc).</p> <p>When the VM encounters the yield at runtime it will suspend the coroutine until a defined condition is met. Many coroutines can be in various states at any given time and - thanks to the resource timeline - can still be memory safe. For example if two stateless invocations are made with a common wait semaphore both can be recorded and submitted without waiting on each other. If there is internal module state accessed the invocations are implicitly ordered by invocation order (similar to what Vulkan calls API order) based on internal resource timeline semaphores.</p> <p>Waking the coroutines can be performed by either an application-provided callback in the case of the application already having a periodic event which is doing bookkeeping (such as frame end callbacks when rendering or Looper idle events on Android), giving direct control over the frequency and location which IREE utilizes to perform additional work. A helper will be provided as well that runs a dedicated IREE thread to do this, but the expectation is that applications can often do a better (and importantly more predictable) job.</p> <p>By utilizing coroutines IREE will have a way to fill traditional pipeline bubbles even with execution from the same module (let alone across modules) in the situation where host readbacks or other logic is required. This increases overall throughput and utilization while reducing host wakeups as many coroutines can be processed at once to submit new work to the device queues, though it does not help reduce per-invocation latency.</p> <p>External code such as the HAL implementation or user ops may provide the wait handles used for continuation. For example, the HAL can expose a function that yields and wakes only when one or more timeline semaphores reach their target values:</p> <pre><code>// submit work\nhal.device.yield %semaphore4 &gt;= %sem4_target, %semaphore5 &gt;= %sem5_target\n// continue here, possibly much later in time\n</code></pre>"},{"location":"developers/design-docs/design-roadmap/#cellular-batching","title":"Cellular Batching","text":"<p>Though coroutines help throughput there is a way we've found to reduce latency that's been documented as cellular batching. This same technique has been implemented in prior internal systems and is one of the motivating design goals for IREE's creation. The core idea is to identify small uniform work that can be partitioned and scheduled greedily such as to enable batching or reduce associated invocation costs (such as refreshing accelerator SRAM/caches with new parameters). This usually manifests as finding large GEMM/GEMV operations using the same fixed parameters and either dynamically increasing the batch size by adding the waiting work (without deferring the actual execution time) or sequencing them back to back to ensure better cache utilization. Which approach is taken depends on any data dependencies that may be present (such as LSTM state feedback edges).</p> <p>With the foundation of coroutines in IREE it's possible to yield execution at any given point - including during command buffer recording - and wake on specific conditions. A majority of the logic can be built into the module itself with very little need for runtime machinery, as shared VM variables can be used to track pending work across invocations (even from different parts of the program) and flush based on logic wholly controlled by the user or compiler (such as count/max time latency/etc limits). This allows for the large variety of scheduling behavior various applications may want to use, such as a zero-latency batch-only-within-this-invocation to a Nagle's Algorithm-esque time or limit based behavior or even some learned model-specific windowing.</p> <p>Design work is still required on how to represent this in IR but the current thought is to model the regions in which deferred execution is possible and beneficial and allow during lowering to the VM additional transformations. This is similar to how the async-await behavior works in C# where the async keyword is just sugar that expands to additional generated helper utilities.</p> <p>A simple strawman representation for sequential dispatch may look like:</p> <pre><code>hal.scheduling_policy @defer_policy {\n  // max time, max count, max live memory, etc\n}\n...\nhal.command_buffer.dispatch.deferred @defer_policy, @dispatch, ...\n// vm.yield added here during lowering\n</code></pre> <p>There are many cases to explore and as cellular batching can have performance benefits of several orders of magnitudes it'll be one of the primary areas of research in the long-term.</p>"},{"location":"developers/design-docs/design-roadmap/#lowering-to-llvm-ir","title":"Lowering to LLVM IR","text":"<p>For scenarios where dynamic module loading is not required and entire modules can be compiled into applications we can lower the VM IR to LLVM IR within MLIR's transformation pipeline. Instead of embedding <code>vm.call</code> ops that are dispatched at runtime to things like the HAL we can instead lower to <code>llvm::CallInst</code> to runtime-resolved function pointers. This still enables all of the flexibility of heterogeneous/runtime-determined devices, pluggable diagnostics, and backend composition without any need for FlatBuffers or the VM bytecode interpreter.</p> <p>The VM was designed to make such a lowering easy and the C-style struct-based function pointer registration for runtime modules was designed to make emitting code that used it fairly robust even when linked in dynamically such as when embedded in shared objects.</p> <p>An extension of this is what we've been calling 'runtimeless mode', where the IREE VM linkage code is statically linked into the binary alongside the generated module LLVM IR. If only a single HAL backend is linked in then (with some build-fu) we should be able to get call devirtualization to reduce code size to precisely the functionality used by the module.</p>"},{"location":"developers/design-docs/design-roadmap/#improved-type-support","title":"Improved Type Support","text":"<p>Currently the VM only supports two types: <code>i32</code> and <code>vm.ref&lt;T&gt;</code>. This is an intentional limitation such that we can determine what is really needed to express the scheduling we perform, with the idea being that such a limited model will make it easier to use techniques like indirect command buffers to compile the VM itself to an accelerator executable that dispatches work without host involvement.</p> <p>As we port more models we may find a few primitives that are worth bringing into the VM design such that it's worth potential complications to future porting. These includes types like <code>f32</code> (for simple float calculations/comparisons), <code>list</code>/<code>dict</code> (easier python compatibility), and <code>vector&lt;4xf32&gt;</code> (for simple inline calculations that are not worth dispatch overhead/synchronization).</p>"},{"location":"developers/design-docs/design-roadmap/#indirect-command-bufferon-accelerator-execution","title":"Indirect Command Buffer/On-Accelerator Execution","text":"<p>Though IREE will use many different tricks such as predication to build deep pipelines there is still the requirement that the command recording and submission happens on the host CPU. Though the cost of this in terms of latency and power use can be minimized by coalescing and timelines there is still the possibility of non-trivial roundtrips being introduced that limit performance. For particular applications like low-power always-on compute or where there is significantly branchy behavior (such as search algorithms) it is important that the decision making logic as to what is dispatched runs as close to real-time as possible within the execution pipeline.</p> <p>The IREE VM is designed to be runnable on-device in a secure and cooperative way (no pointers, indirect buffer handles to allow for memory space rearrangement op-to-op, deterministic execution and explicit yield points, etc).</p> <p>The recent efforts to bring indirect command buffers to Vulkan and Metal's Indirect Command Buffers (that both derive inspiration from NV_command_list) are one such target for this. Either by lowering the VM IR to LLVM IR or SPIR-V, by a special conversion to target-specific forms, or by actually executing the VM bytecode directly on-device (it's ~1000 LoC) we should be able to prototype what full on-device usage is like. Even if only some VM functions the compiler deems useful to schedule on the device are used and the rest run on the host (particularly those functions calling imported functions) some of the most costly logic that creates tight coupling of the host and device scheduling can be limited.</p>"},{"location":"developers/design-docs/function-abi/","title":"Function ABI","text":"<p>Note</p> <p>Authored December, 2019</p> <p>Updated August, 2021</p> <p>A key job of the IREE compiler and runtime is capturing function call semantics from the originating system and providing mechanisms so that invocations can be performed in as similar way as possible in various target languages. In general, this requires additional metadata on top of the raw characteristics of a function. Where possible, this is done by attaching attributes to a function.</p> <ul> <li><code>iree.abi</code> : JSON encoded description of the function's calling convention.</li> </ul>"},{"location":"developers/design-docs/function-abi/#v1-abi","title":"V1 ABI","text":"<p>This is the default ABI supported by the IREE VM invocations. It attempts to provide a default calling convention that can be used without further reflection metadata but which may be enhanced with it.</p> <p>It natively allows monomorphic functions to be exported where arguments and results are composed of the following types:</p>"},{"location":"developers/design-docs/function-abi/#value-types","title":"Value Types:","text":"<ul> <li>Byte aligned integer type (i8, i16, i32, i64)</li> <li>Floating point value (f16, f32, f64)</li> </ul>"},{"location":"developers/design-docs/function-abi/#reference-types","title":"Reference Types:","text":"<ul> <li> <p>ND-Array buffers of Value Types:</p> <ul> <li>Simple: Packed, C-layout</li> <li>Strided: Arbitrary layout with strides (future)</li> </ul> </li> <li> <p>String (byte arrays)</p> </li> <li> <p>Opaque reference object</p> </li> </ul>"},{"location":"developers/design-docs/function-abi/#sequence-types","title":"Sequence Types:","text":"<ul> <li>Tuples: fixed length lists where each position has its own type bound</li> <li>Homogenous list: lists of arbitrary size where a single type bound applies     to all elements</li> </ul> <p>The intent with these low level types is that calling conventions can be synthesized to bind arbitrary high level, domain/language specific signatures to these types, possibly by way of additional reflection metadata.</p>"},{"location":"developers/design-docs/function-abi/#representations","title":"Representations:","text":"<p>The above are all representable with native constructs in the VM:</p> <ul> <li> <p>ValueType:</p> <ul> <li>Runtime:     <code>iree_vm_value</code></li> <li>Compile Time: primitive MLIR integer/floating point types</li> </ul> </li> <li> <p>Simple ND-Array Buffer:</p> <ul> <li>Runtime:     <code>iree_hal_buffer_view</code></li> <li>Compile Time: <code>tensor&lt;&gt;</code></li> </ul> </li> <li> <p>String:</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     containing <code>i8</code></li> <li>Compile Time: <code>!util.list&lt;i8&gt;</code></li> </ul> </li> <li> <p>Tuple:</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     of variant</li> <li>Compile Time: <code>!util.list&lt;?&gt;</code></li> <li>Note that these are statically type erased at the boundary.</li> </ul> </li> <li> <p>TypedList (homogenous):</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     of <code>T</code></li> <li>Compile Time: <code>!util.list&lt;T&gt;</code></li> </ul> </li> </ul>"},{"location":"developers/design-docs/function-abi/#extended-type-calling-conventions","title":"Extended Type Calling Conventions","text":"<p>While the above features of the native ABI may be sufficient for direct use by various programs, many programs and callers will need to represent various higher level types, consistently mapping them to the above facilities. This section describes calling conventions for various higher level types which do not map 1:1 to the above. Not all source language types are representable, and extending these calling conventions (and the fundamental types above) is demand driven.</p> <p>All of these calling conventions presume that the arity of the arguments/results of the raw function matches the user-level function, meaning that the calling convention is specified per argument/result. Higher-level whole function transformations may also exist for some domains but are outside of the scope of this specification.</p>"},{"location":"developers/design-docs/function-abi/#structure","title":"Structure","text":"<p>A <code>Structure</code> is a common enough entity to have a dedicated calling convention. In C-like languages, this may just be a <code>struct</code>. In Python, it is typically a <code>dict</code> with an associated schema providing a name and type bound for each of its slots. In both, its slots are of fixed arity.</p> <p>In this convention, such a structure is represented as a <code>Tuple</code> in the native calling convention (i.e. <code>!util.list</code> of variant type). The order of the elements of the tuple are the natural order of the structure, where that is either:</p> <ul> <li>For a C-like system where order is determinate, it is the order of     declaration.</li> <li>For a name-based system (i.e. bind to <code>dict</code>) where no order is defined, the     natural order will be the lexically sorted order of the keys.</li> </ul>"},{"location":"developers/design-docs/function-abi/#string","title":"String","text":"<p>Most languages interop between byte arrays (i.e. the native ABI <code>String</code> type) by way of applying an encoding. Such strings are just a sequence of bytes (i.e. <code>!util.list&lt;i8&gt;</code>).</p>"},{"location":"developers/design-docs/function-abi/#typed-list","title":"Typed List","text":"<p>High level lists which all share the same type bound are represented as a <code>TypedList</code> in the native ABI.</p>"},{"location":"developers/design-docs/function-abi/#ndarray-of-reference-types","title":"NDArray of Reference Types","text":"<p>NDArrays of reference types are considered separately from those of value types. Internally, the code generated for them is completely different from what gets generated for numeric based arrays (i.e. has ref-counting, ownership semantics, non-POD, etc). These types are permitted for completeness, not necessarily performance: by nature they are already indirected and have overheads.</p> <p>In the native ABI, these are represented as a composite tuple type (i.e. today a list since sugar for tuple is not yet defined): <code>!iree.tuple&lt;!util.list&lt;T&gt;, !util.list&lt;index&gt;&gt;</code>. The first element of the tuple is the list of values, packed with a C-Layout and the second element is the list of dimension sizes.</p>"},{"location":"developers/design-docs/function-abi/#reflection","title":"Reflection","text":"<p>Additional reflection metadata may be encoded in a custom JSON form, providing additional typing hints for arguments and results. If present, this will be a reflection attribute with key <code>d</code>, containing a serialized JSON object.</p> <p>The JSON object contains:</p> <ul> <li><code>a</code> (array): List of type records for each argument.</li> <li><code>r</code> (array): List of type records for each argument.</li> </ul> <p>Type records are one of:</p> <ul> <li> <p>A string naming a primitive type:</p> <ul> <li><code>i[0-9]+</code>: Integer type with given bit width</li> <li><code>f[0-9]+</code>: IEEE floating point type with given bit width</li> <li><code>bf16</code>: BFloat16</li> </ul> </li> <li> <p>JSON <code>null</code>: A null reference value</p> </li> <li> <p><code>\"unknown\"</code>: An unknown/unmapped type</p> </li> <li> <p>An array, interpreted as a tuple describing a compound type.</p> </li> </ul>"},{"location":"developers/design-docs/function-abi/#compound-type-tuples","title":"Compound type tuples","text":"<p>A compound type tuple has a type identifier as its first element, followed with type specific fields:</p> <ul> <li><code>[\"named\", \"key\", {slot_type}]</code>: Associates a name with a slot. This is     used with the root argument list to denote named arguments that can be     passed positionally or by keyword.</li> <li><code>[\"ndarray\", {element_type}, {rank}, {dim...}]</code>: For unknown rank, the     <code>rank</code> will be <code>null</code> and there will be no dims. Any unknown dim will be     <code>null</code>.</li> <li><code>[\"slist\", {slot_type...}]</code>: An anonymous structured list of fixed arity and     slot specific types. If there are gaps in the list, empty slots will have a     <code>null</code> type.</li> <li><code>[\"stuple\", {slot_type...}]</code>: Same as <code>slist</code> but some languages     differentiate between sequences represented as lists and those represented     as tuples (read-only lists).</li> <li><code>[\"sdict\", [\"key\", {slot_type}]...]</code>: An anonymous structure with named     slots. Note that when passing these types, the keys are not passed to the     function (only the slot values).</li> <li><code>[\"py_homogeneous_list\", {element_type}]</code>: A Python list of unknown size     with elements sharing a common type bound given by <code>element_type</code>.</li> </ul>"},{"location":"developers/design-docs/hip-hal-driver/","title":"HIP HAL driver","text":"<p>This document lists technical details regarding the HIP implemenation of IREE's Hardware Abstraction Layer, called a HIP HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using HIP is mostly straightforward; though there are places we need emulation given no direct mapping concepts or mechanisms. HIP HAL driver draws inspiration from the CUDA HAL driver and the code structure is based off of that implementation.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#overall-design-choices","title":"Overall design choices","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#hip-driver-vs-runtime-api","title":"HIP driver vs runtime API","text":"<p>IREE HAL's design draws inspiration from modern GPU APIs--it provides explicit control of low-level GPU objects. The compiler is expected to plan the object lifetime and schedule workload and synchronization in an optimized way; IREE HAL implementation and the underlying GPU driver stack is expected to be a thin layer without much smarts and magic.</p> <p>Unlike CUDA, HIP doesn't provide two separate API's with the same functionality in the name of driver and runtime. Instead it extends the HIP API with Modules and Ctx control API's that the CUDA driver API's exclusively offer.At runtime the HIP HAL driver will load the <code>libamdhip64.so</code>/<code>amdhip64.dll</code> library dynamically.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#gpu-objects","title":"GPU Objects","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#driver","title":"Driver","text":"<p>There is no direct HIP construct that map to the IREE HAL <code>iree_hal_driver_t</code> abstraction. We use it to hold the dynamic symbols loaded for all devices, and device enumeration and creation.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_hip_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to HIP GPU device by wrapping a <code>hipDevice_t</code>. For each device, right now we create two <code>hipStream_t</code>s--one for issuing commands for memory allocation and kernel lauches as instructed by the program; the other for issue host callback functions after dispatched command buffers completes. See synchronization section regarding the details.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#async-allocation","title":"Async allocation","text":"<p>The HIP HAL drivers supports async allocation (<code>iree_hal_device_queue_alloca()</code> and <code>iree_hal_device_queue_dealloca()</code>) via HIP stream ordered memory allocation.</p> <p>The <code>async_allocations</code> in the <code>iree_hal_hip_device_params_t</code> struct allows to enable this feature.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#command-buffer","title":"Command buffer","text":"<p><code>iree_hal_command_buffer_t</code> is a recording of commands to issue to the GPU; when the command buffer is submitted to the device it's then actually executed on the GPU asynchronously.</p> <p>Two implementations of <code>iree_hal_command_buffer_t</code> exist in the HIP HAL driver--one backed by <code>hipGraph_t</code> and the other backed by <code>hipStream_t</code>.</p> <p><code>hipGraph_t</code> conceptually matches <code>iree_hal_command_buffer_t</code> better given it's a recording of commands to issue to the GPU. Also using the <code>hipGraph_t</code> API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams. Therefore, the <code>hipGraph_t</code>-backed implementation is a more natural one. Though note that <code>hipGraph_t</code> API is meant to be used for recording once and replaying multiple times and there may be a performance penalty to using <code>hipGraph_t</code> API for one-shot command buffer.</p> <p>The <code>hipStream_t</code>-backed implementation just issues commands directly to a <code>hipStream_t</code> when recording. Commands issued to <code>hipStream_t</code> can be immediately sent to the GPU for execution; there is no recording and replaying separation. In order to match the recording semantics of <code>iree_hal_command_buffer_t</code>, to use the <code>hipStream_t</code>-backed command buffer, we need to first record the command buffer into an in-memory <code>iree_hal_deferred_command_buffer_t</code>, and then when applying the command buffer, we create a new <code>hipStream_t</code>-backed implementation.</p> <p>The <code>command_buffer_mode</code> in the <code>iree_hal_hips_device_params_t</code> struct allows to select which implementation to use.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#allocator","title":"Allocator","text":"<p>The allocator will forward allocation requests to <code>hipHostMalloc()</code> for host local memory, <code>hipMalloc()</code> for device local and host invisible memory, and <code>hipMallocManaged()</code> for device local and host visible memory.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#buffer","title":"Buffer","text":"<p>HIP buffers are represented either as a host pointer or a device pointer of type <code>hipDeviceptr_t</code>.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#executable","title":"Executable","text":"<p><code>iree_hal_executable_t</code> maps naturally to <code>hipModule_t</code>.</p> <p>The compiler generates a FlatBuffer containing a HSACO image as well as a list of entry point functions and their associated metadata (names, workgroup size, dynamic shared memory size, etc.). At runtime, the HIP HAL driver loads the HSACO image and creates <code>hipFunction_t</code>s out of it for various entry points.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#synchronization","title":"Synchronization","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#event","title":"Event","text":"<p><code>iree_hal_event_t</code> right now is not used in the compiler so it's not yet implemented in the HIP HAL driver.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#semaphore","title":"Semaphore","text":"<p>The IREE HAL uses semaphores to synchronize work between host CPU threads and device GPU streams. It's a unified primitive that covers all directions--host to host, host to device, device to host, and device to device, and allows flexible signal and wait ordering--signal before wait, or wait before signal. There is no limit on the number of waits of the same value too.</p> <p>The core state of a HAL semaphore consists of a monotonically increasing 64-bit integer value, which forms a timeline--signaling the semaphore to a larger value advances the timeline and unblocks work waiting on some earlier values. The semantics closely mirrors Vulkan timeline semaphore.</p> <p>In HIP, there is no direct equivalent primitives providing all the capabilities needed by the HAL semaphore abstraction. Therefore, to implement the support, we need to leverage multiple native CPU or HIP primitives under the hood.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#hipevent_t-capabilities","title":"<code>hipEvent_t</code> capabilities","text":"<p>The main synchronization mechanism is HIP event--<code>hipEvent_t</code>. As a functionality and integration baseline, we use <code>hipEvent_t</code> to implement the IREE HAL semaphore abstraction.</p> <p><code>hipEvent_t</code> natively supports the following capabilities:</p> <ul> <li>State: binary; either unsignaled or signaled. There can exist multiple   waits (e.g., via <code>hipEventSynchronize()</code> or <code>hipGraphAddEventWaitNode()</code>) for   the same <code>hipEvent_t</code> signal (e.g., via <code>hipEventRecord()</code> or   <code>hipGraphAddEventRecordNode()</code>).</li> <li>Ordering: must be signal before wait. Waiting before signal would mean   waiting an empty set of work, or previously recorded work.</li> <li>Direction: device to device, device to host.</li> </ul> <p>We need to fill the remaining capability gaps. Before going into details, the overall approach would be to:</p> <ul> <li>State: we need a 64-bit integer value timeline. Given the binary state of   a <code>hipEvent_t</code>, each <code>hipEvent_t</code> would just be a \"timepoint\" on the timeline.</li> <li>Ordering: we need to defer releasing the workload to the GPU until the   semaphore waits are reached on the host, or we can have some device   <code>hipEvent_t</code> to wait on.</li> <li>Direction: host to host and host to device is missing; we can support that   with host synchronization mechanisms.</li> </ul>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#signal-to-wait-analysis","title":"Signal to wait analysis","text":"<p>Concretely, for a given HAL semaphore, looking at the four directions:</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#cpu-signal","title":"CPU signal","text":"<p>A CPU thread signals the semaphore timeline to a new value.</p> <p>If there are CPU waits, it is purely on the CPU side. We just need to use common CPU notification mechanisms. In IREE we have <code>iree_event_t</code> wrapping various low-level OS primitives for it. So we can just use that to represent a wait timepoint. We need to keep track of all CPU wait timepoints in the timeline. After a new signaled value, go through the timeline and notify all those waiting on earlier values.</p> <p>If there are GPU waits, given that there are no way we can signal a <code>hipEvent_t</code> on CPU, one way to handle this is to cache and defer the submission batches by ourselves until CPU signals past the desired value. To support this, we would need to implement a deferred/pending actions queue.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#gpu-signal","title":"GPU signal","text":"<p>GPU signals can only be through a <code>hipEvent_t</code> object, which has a binary state. We need to advance the timeline too. One way is to use <code>hipLaunchHostFunc()</code> to advance from the CPU side with <code>iree_hal_semaphore_list_signal()</code>. This additionally would mean we can reuse the logic form CPU signaling to unblock CPU waits.</p> <p>After advancing the timeline from the CPU side with <code>hipLaunchHostFunc()</code>, we can release more workload from the deferred/pending actions queue to the GPU. Though, per the documentation of <code>hipLaunchHostFunc()</code>, \"the host function must not make any HIP API calls.\" So we cannot do that directly inside <code>hipLaunchHostFunc()</code>; we need to notify another separate thread to call HIP APIs to push more work to the GPU. So the deferred/pending action queue should have an associcated thread.</p> <p>For GPU waits, we can also leverage the same logic--using CPU signaling to unblock deferred GPU queue actions. Though this is performant, given that the CPU is involved for GPU internal synchronization. We want to use <code>hipEvent_t</code> instead:</p> <ul> <li>We keep track of all GPU signals in the timeline. Once we see a GPU wait   request, try to scan the timeline to find a GPU signal that advances the   timeline past the desired value, and use that for waiting instead. (This   actually applies to CPU waits too, and it's an optimization over pure   CPU side <code>iree_event_t</code> polling.)</li> <li>We may not see GPU signal before seeing GPU wait requests, then we can also   keep track of all GPU waits in the timeline. Later once see either a CPU   signal or GPU signal advancing past the waited value, we can handle them   accordingly--submitting immediately or associating the <code>hipEvent_t</code>.   This would also guarantee the requirement of <code>hipEvent_t</code>--recording should   happen before waiting.</li> <li>We can use the same <code>hipEvent_t</code> to unblock multiple GPU waits. That's allowed,   though it would mean we need to be careful regarding <code>hipEvent_t</code> lifetime   management. Here we can use reference counting to see how many timepoints   are using it and automatically return to a pool once done.</li> </ul> <p>Another problem is that per the <code>hipLaunchHostFunc()</code> doc, \"the function will be called after currently enqueued work and will block work added after it.\" We don't want the blocking behavior involving host. So we can use a dedicated <code>hipStream_t</code> for launching the host function, waiting on the <code>hipEvent_t</code> from the original stream too. We can also handle resource deallocation together there.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#data-structures","title":"Data structures","text":"<p>To summarize, we need the following data structures to implement HAL semaphore:</p> <ul> <li><code>iree_event_t</code>: CPU notification mechanism wrapping low-level OS primitives.   Used by host wait timepoints.</li> <li><code>iree_event_pool_t</code>: a pool for CPU <code>iree_event_t</code> objects to recycle.</li> <li><code>iree_hal_hip_event_t</code>: GPU notification mechanism wrapping a <code>hipEvent_t</code> and   a reference count. Used by device signal and wait timepoints. Associates with   a <code>iree_hal_hip_event_pool_t</code> pool--returns to the pool directly on once   reference count goes to 0.</li> <li><code>iree_hal_hip_event_pool_t</code>: a pool for GPU <code>iree_hal_hip_event_t</code> objects   to recycle.</li> <li><code>iree_hal_hip_timepoint_t</code>: an object that wraps a CPU <code>iree_event_t</code> or   GPU <code>iree_hal_hip_event_t</code> to represent wait/signal of a timepoint on a   timeline.</li> <li><code>iree_hal_hip_timepoint_pool_t</code>: a pool for <code>iree_hal_hip_timepoint_t</code>   objects to recycle. This pool builds upon the CPU and GPU event pool--it   acquires CPU/GPU event objects there.</li> <li><code>iree_hal_hip_timeline_semaphore_t</code>: contains a list of CPU wait and GPU   wait/signal timepoints.</li> <li><code>iree_hal_hip_queue_action_t</code>: a pending queue action (kernel launch or   stream-ordered allocation).</li> <li><code>iree_hal_hip_pending_queue_actions_t</code>: a data structure to manage pending   queue actions. It provides APIs to enqueue actions, and advance the queue on   demand--queue actions are released to the GPU when all their wait semaphores   are signaled past the desired value, or we can have a <code>hipEvent_t</code> object already   recorded to some <code>hipStream_t</code> to wait on.</li> </ul>","tags":["GPU","HIP"]},{"location":"developers/design-docs/invocation-execution-model/","title":"Invocation execution model","text":"<p>Authored June, 2022</p> <p>This documents the behavior of the user-visible invocation mechanism IREE uses to schedule program execution. Internally IREE uses a very similar modeling for tracking its internal workloads and in kind carries that down to target APIs and devices that themselves use a very similar model. The intent is to expose the device model in an abstracted way that allows for the full capture and communication of the execution intent to be propagated to the hardware that executes it. Though here we focus on the user-visible portion of execution there is really only one \"IREE execution model\" and the entire stack follows the same design. At its core this design is just an instantiation of an out-of-order execution algorithm such as those originating from the 1960's.</p>"},{"location":"developers/design-docs/invocation-execution-model/#glossary","title":"Glossary","text":"<pre><code>stateDiagram\n    state UserApplication {\n      direction BT\n      state Context0 {\n        ModuleA--&gt;ModuleAState0\n        ModuleB--&gt;ModuleBState0\n      }\n      state Context1 {\n        ModuleA--&gt;ModuleAState1\n        ModuleB--&gt;ModuleBState1\n        ModuleC--&gt;ModuleCState1\n      }\n      state ModuleA {\n        @func1\n        @func2\n      }\n      state ModuleB {\n        @func3\n        @func4\n      }\n      state ModuleC {\n        @func5\n      }\n    }</code></pre>"},{"location":"developers/design-docs/invocation-execution-model/#program","title":"Program","text":"<p>An IREE program is a collection of modules instantiated in a context from which invocations can be made. Invocations are ordered on a user-controlled timeline that uses fences to define the execution order requirements to enable out-of-order execution. A hosting user application may have multiple programs or multiple instances of the same program available and running invocations at a time across multiple timelines.</p>"},{"location":"developers/design-docs/invocation-execution-model/#module","title":"Module","text":"<p>Modules define executable code and data that can be loaded, linked, and run \u00e0 la ELF shared libraries. Modules may be implemented as C/C++, generated bytecode or C sources from the IREE compiler, or any other mechanism that can run code and implement the <code>iree_vm_module_t</code> interface. Modules on their own are read-only and can be reused across many contexts.</p> <p>Traditional ML runtimes would use a model (graph, etc) as their module representation. In IREE everything is a module including runtime subsystems like the HAL and user-provided custom code. This ensures that anything IREE can do can be externalized and replaced by users without needing to modify the core IREE code.</p>"},{"location":"developers/design-docs/invocation-execution-model/#context","title":"Context","text":"<p>A collection of modules are linked and instantiated in a context. Each context operates independently and carries its own copies of mutable module state. Invocations execute within a context scope and hosting applications coordinate across contexts as required. Contexts are cheap to create (microseconds) and retain (~100B + program state) such that users can decide how to manage them based on their scenario.</p> <p>Traditional ML runtimes would call these \"sessions\" but in IREE everything is a program. Whether the program is stateful or stateless and how the program is invoked is up to the program author.</p>"},{"location":"developers/design-docs/invocation-execution-model/#invocation","title":"Invocation","text":"<p>An invocation represents a single call into a module exported function using the program state stored in a context. Users can decide whether to perform synchronous blocking invocations or asynchronous non-blocking invocations per-call; the behavior of the invocation is independent from the target function and a user program may contain a mix of both.</p> <p>As an example a user program may synchronously invoke a <code>@query_output_shapes</code> function to preallocate storage for an asynchronous <code>@execute_in_place</code> function to write into.</p>"},{"location":"developers/design-docs/invocation-execution-model/#timeline","title":"Timeline","text":"<p>A timeline represents the observable order of execution. Users define their own timelines and communicate them to IREE via fences. Timelines do not match up with the order of invocations unless the user dictates they must by way of fences. In the absence of fences all invocations execute in an arbitrary order and they may execute concurrently just as threads in C with no barriers.</p> <p>Each timeline can be thought of as an independent clock domain that may operate asynchronously at its own frequency with only fences acting to tie separate timelines together. This directly mirrors real hardware constraints like clock domain crossing as each execution scope (thread on core, driver calls to queues, kernel queues to device queues, device queues to compute unit queues, etc) is naturally operating at different rates and well-designed systems must tolerate that variability.</p>"},{"location":"developers/design-docs/invocation-execution-model/#fence","title":"Fence","text":"<p>A fence is a specific point of progress in one or more timelines acting as a barrier, fork, or join point. Fences only guard execution ordering and not any particular resources though users can use them to guard resources by defining when in time the resources are available for use.</p> <p>Waits on fences are wait-until operations specifying that the timeline must reach  at least a specific point. This allows for flexible reordering and deferral of execution as executors can pull forward scheduled work based on policy (run similar work together, etc).</p>"},{"location":"developers/design-docs/invocation-execution-model/#hardware-abstraction-layer-hal","title":"Hardware Abstraction Layer (HAL)","text":"<p>The HAL is an optional feature of IREE that is used to provide a consistent interface across execution resources. It is used internally by IREE programs to define and submit work to devices and signal across them but may also be used by users to directly interface with hardware in a compatible way. Exposing the HAL API allows for users to efficiently manage their data and custom execution without expensive marshaling. Most users will only interact with HAL buffers as they work with their data but more advanced integrations can directly insert IREE into existing device contexts to transparently share scheduling and resources or insert their own code into IREE to pipeline custom execution.</p>"},{"location":"developers/design-docs/invocation-execution-model/#execution-by-timelines","title":"Execution by Timelines","text":"<p>NOTE: this defines an execution scheme that IREE supports but a user may use one or more such schemes in a single program - just as a C application may mix single- and multi-threaded code within itself for different components.</p> <p>The combination of invocations, timelines, and fences allows users to provide future knowledge to lower layers of the system by declaring their availability requirements and the lower layers are then able to execute the work out-of-order so long as the specified requirements are met. The primary goal when designing for such a system is to specify as few requirements as possible in order to provide the maximum amount of scheduling freedom to the implementation.</p> <p>This makes timelines one of the most critical components of the interface. The purpose of invocations is to schedule work against one or more timelines and what happens within the invocations is an implementation detail of the program.</p>"},{"location":"developers/design-docs/invocation-execution-model/#sequential-execution","title":"Sequential Execution","text":"<p>Here we say \"a user invokes a function to schedule execution on a timeline\" vs. a more traditional \"a user invokes a function to execute work\" and this manifests in the IREE ABI as invocations taking fences defining specific points on timelines of which the user may observe:</p> <pre><code># Fences are effectively just timeline + integer tuples and are cheap to hold.\nwait_fence = my_timeline.at(t)\nsignal_fence = my_timeline.at(t+1)\n# Schedule work against the timeline.\n# All work prior to t must complete before execution can occur and after\n# execution the timeline will advance to t+1.\nasync_invoke(@some_fn, wait_fence, signal_fence)\n# The invocation may have returned immediately after the work was scheduled;\n# until the fence is reached no actual execution may have occurred. To\n# synchronize the user code with the timeline the user can block until the fence\n# is reached.\nsignal_fence.wait()\n</code></pre> <p>To the user this would appear as:</p> <pre><code>sequenceDiagram\n    User-&gt;&gt;@some_func: invoke\n    activate @some_func\n    @some_func-&gt;&gt;User: ;\n    @some_func--&gt;&gt;@some_func: wait t\n    @some_func--&gt;&gt;User: signal t+1\n    deactivate @some_func</code></pre> <p>This means from the user's perspective the actual operations performed by the invocation are not important: the only thing the user can observe in this situation is when the timeline reaches <code>t+1</code> as they specified. Whether internally the invocation needs many steps to complete as there are timelines internal to the program is an implementation detail. Actual execution may look like this:</p> <pre><code>sequenceDiagram\n    User-&gt;&gt;@some_func: invoke\n    activate @some_func\n    @some_func-&gt;&gt;User:  ;\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device A: ;\n    Device A--&gt;&gt;Device A: wait t\n    activate Device A\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device B: ;\n    activate Device B\n    @some_func-&gt;&gt;@some_func: ;\n    Device A--&gt;&gt;@some_func: ;\n    deactivate Device A\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device B: ;\n    activate Device B\n    deactivate @some_func\n    Device B--&gt;&gt;User: signal t+1\n    deactivate Device B\n    deactivate Device B</code></pre> <p>Even in this simple user-synchronous example the system is able to internally run several concurrent timelines with a minimal number of synchronization points and the lowest possible latency as the user is immediately notified without any intermediate layers needing to be woken, scheduled, executed, and passed on.</p>"},{"location":"developers/design-docs/invocation-execution-model/#pipelined-execution","title":"Pipelined Execution","text":"<p>The true power of timelines comes from the ability to pipeline execution. Users define DAGs with fences and can construct arbitrarily complex execution topologies whether from the same program or across multiple programs:</p> <pre><code>stateDiagram\n    direction LR\n    state fence0 &lt;&lt;fork&gt;&gt;\n    [*] --&gt; fence0\n    fence0 --&gt; @fn0\n    state fence1 &lt;&lt;fork&gt;&gt;\n    @fn0 --&gt; fence1\n    fence1 --&gt; @fn1\n    fence1 --&gt; @fn2\n    state fence2 &lt;&lt;join&gt;&gt;\n    @fn1 --&gt; fence2\n    @fn2 --&gt; fence2\n    @fn3 --&gt; fence2\n    fence0 --&gt; @fn4\n    @fn4 --&gt; fence2\n    fence2 --&gt; [*]</code></pre> <p>This is a simple extension to the synchronous example using the same primitives:</p> <pre><code># Timeline is defined by the user.\nfence_a = my_timeline.at(t)\nfence_b = my_timeline.at(t+1)\nfence_c = my_timeline.at(t+2)\n# Invocations are launched using the fences and may not complete immediately.\nasync_invoke(@fn0, fence_a, fence_b)\nasync_invoke(@fn1, fence_b, fence_c)\nasync_invoke(@fn2, fence_b, fence_c)\nasync_invoke(@fn3, None, fence_c)\nasync_invoke(@fn4, fence_a, fence_c)\n# Blocking here but no need to; could pass fence_c on to other invocations.\nfence_c.wait()\n</code></pre> <p>The critical point of this being that the user never had to wait for any particular invocation to complete before being able to schedule more work against the timeline, even if those invocations could themselves not complete synchronously. The lower layers of the system are able to fully model the execution as early as possible without needing to communicate (and importantly synchronize) with the user.</p>"},{"location":"developers/design-docs/invocation-execution-model/#io","title":"I/O","text":"<p>Users define the semantics of their programs themselves. For example if the user knows the precise shape of an output buffer they can preallocate the buffer and pass it in. If they don't know they can decide to factor out the shape calculation and invoke that synchronously in order to compute the shape, allocate the appropriately sized buffer, and pass that in. Or they could decide to only deal with synchronous invocations and return a program-allocated buffer view with the appropriate shape in their callback. IREE does not dictate the design of user programs and as such enables mixed stateful/stateless, asynchronous/synchronous, and arbitrary scheduling models (enqueue/drain, windowing, etc).</p> <p>Inputs and outputs to invocations are provided by the user as primitive values (integers, floats, etc), supported builtin types (lists, byte buffers/strings), custom user types, and HAL types like buffers or buffer views (buffers + shape and type metadata). One or more wait fences can be used to order invocation access to one or more inputs by indicating that the resource is not available until a certain fence is reached. Similarly one or more signal fences can be used to order subsequent access to the resources by indicating the advancement of the timeline when they are available.</p> <pre><code># wait_fence_a must be reached before buffer_a and buffer_b can be read.\n# wait_fence_b must be reached before buffer_c can be read.\n# buffer_a will be ready to read when signal_fence_a has been reached.\nasync_invoke(@fn,\n             (wait_fence_a, buffer_a, buffer_b),\n             42,  # no ordering required on value types\n             (wait_fence_b, buffer_c),\n             (signal_fence_a, buffer_a))\n</code></pre> <p>The above example demonstrates an in-place operation on <code>buffer_a</code>. It's also possible for invocations to return values:</p> <pre><code>result = invoke(@sum, 1, 2)  # = 3\n</code></pre> <p>When executed asynchronously a callback or any construct that can be built upon them (like promises/futures) can receive the results:</p> <pre><code>def my_callback(result):\n  print(result)  # 3\nasync_invoke(@sum, 1, 2, my_callback)\n</code></pre>"},{"location":"developers/design-docs/invocation-execution-model/#stream-ordered-allocations","title":"Stream-ordered Allocations","text":"<p>Invocations generally have only a few KB of overhead and pipelined command buffers take only a small amount more. Storage buffers, however, can easily take hundreds of MB per invocation for I/O and transient state. This compounds as program usage becomes more complex or multiple programs are involved. IREE supports traditional host-ordered allocations (\u00e0 la malloc/free) for persistent buffers like large constants/read-only data or user-managed ringbuffers. Stream-ordered allocations are also supported to allow for pooled buffer reservations that can be allocated in a scheduled order alongside program execution.</p> <p>For more detailed examples see the CUDA blog posts describing their implementation: part 1, part 2.</p> <p>With stream-ordered allocations each allocation and deallocation operation is scheduled with wait and signal fences just as with invocations. This allows these allocation operations to execute remotely on device without host program involvement. For example, scheduling <code>alloca0</code>/<code>dealloca0</code> and <code>alloca1</code>/<code>dealloca1</code> interleaved with the function execution allows for the transient memory required for executing <code>@fn0</code> to remain uncommitted until immediately before it is executed, committed during execution, and then decommitted immediately after execution. The memory required for passing data from <code>@fn0</code> to the subsequent <code>@fn1</code> and <code>@fn2</code> survives until after they have completed executing before being decommitted. By using the same scheduling primitives as execution the allocation topology can be as arbitrarily complex as the invocation topology:</p> <pre><code>stateDiagram\n    direction LR\n    state fence0a &lt;&lt;fork&gt;&gt;\n    [*] --&gt; fence0a\n    state fence0b &lt;&lt;fork&gt;&gt;\n    fence0a --&gt; alloca0\n    fence0a --&gt; alloca1\n    alloca0 --&gt; fence0b\n    alloca1 --&gt; fence0b\n    fence0b --&gt; @fn0\n    state fence1a &lt;&lt;fork&gt;&gt;\n    @fn0 --&gt; fence1a\n    state fence1b &lt;&lt;fork&gt;&gt;\n    fence1a --&gt; dealloc0\n    dealloc0 --&gt; fence1b\n    fence1b --&gt; @fn1\n    fence1b --&gt; @fn2\n    state fence2a &lt;&lt;join&gt;&gt;\n    @fn1 --&gt; fence2a\n    @fn2 --&gt; fence2a\n    state fence2b\n    fence2a --&gt; dealloc1\n    state fence2b &lt;&lt;join&gt;&gt;\n    dealloc1 --&gt; fence2b\n    fence2b --&gt; [*]</code></pre> <p>When operating in this way allocations from the host-perspective are just reservations for a slice of pooled storage that will be committed at some point in the future. Likewise deallocations from the host-perspective release the prior reservation and schedule the paired decommit at some point in the future. Scheduling N sequential invocations thus requires only enough committed storage for a single invocation in addition to the I/O (unless that too is stream-ordered).</p> <p>This scheduling behavior allows for both minimal peak memory consumption regardless of the number of programs or invocation pipeline depth and sharing of committed storage across programs: the memory consumption of a program at rest is near zero when stateless and the sum of all state when stateful. Target devices that natively support stream-ordered allocations (like CUDA) can even share pools across processes.</p> <p>The other provided feature in combination with the fence guaranteed forward progress is that so long as the memory pool can service a single request execution can still continue even when constrained. A device can serialize two independent invocations requiring 400MB of transient memory when the system only has 512MB available with no user-visible impact besides increased latency. This does require the user to ensure they schedule work that is possible to run or rely on the target system having paging in order to lighten the strictness of the pool quotas.</p> <p>Stream-ordered allocations performed by the user for invocation inputs can be declared as transferred to the program. This allows the program to eagerly deallocate or reuse the input storage while still preserving the internal scheduling requirements of the program.</p>"},{"location":"developers/design-docs/invocation-execution-model/#internal-state","title":"Internal State","text":"<p>A stateful program may contain internal timelines that it uses to order its own execution. Take for example this simple stateful program:</p> <pre><code>class TrivialKernel(Program):\n  _x0 = Program.export_global(x_type)\n  def get(self):\n    return self._x0\n  def set(self, x=x_type):\n    self._x0 = x\n  def matmul(self, x=y_type):\n    self._x0 = self._matmul(x, self._x0)\n  @Program.kernel\n  def _matmul(x, x0):\n    return jnp.matmul(x, x0)\n</code></pre> <p>Each invocation of <code>matmul</code> needs to be executed in-order with prior invocations as there is a data dependency established on <code>self._x0</code>. Attempts to <code>get</code> or <code>set</code> must also be sequenced correctly with the <code>matmul</code> invocations. A basic usage like this:</p> <pre><code>m = TrivialKernel()\nm.set(input)\nm.matmul(a)\nm.matmul(b)\nm.matmul(c)\noutput = m.get()\nprint(output)  # implicit wait\n</code></pre> <p>Would be executed as:</p> <pre><code>sequenceDiagram\n    activate User\n    User-&gt;&gt;TrivialKernel: @set(input)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    activate Device\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(a)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(b)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(c)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @get()\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    Device--&gt;&gt;Device: ;\n    deactivate User\n    User-&gt;&gt;User: (wait)\n    Device--&gt;&gt;User: (signal)\n    deactivate Device\n    activate User\n    User-&gt;&gt;User: print(output)\n    deactivate User</code></pre> <p>Note that although the user provided no timeline of their own execution is still ordered correctly due to the internal timeline constructed by the program. If the user wanted to also pipeline execution with another program they could do so by providing their own fences.</p>"},{"location":"developers/design-docs/metal-hal-driver/","title":"Metal HAL driver","text":"<p>This document lists technical details regarding the Metal implemenation of IREE's Hardware Abstraction Layer, called a Metal HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using modern GPU APIs is generally straightforward. This applies to the Metal HAL driver.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#overall-design-choices","title":"Overall Design Choices","text":"","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#metal-versions","title":"Metal Versions","text":"<p>Currently the Metal HAL driver expects Metal 3 capabilities. Metal 3 was released late 2022 and are supported since macOS Ventura and iOS 16. It covers recent Apple silicon GPUs including A13+ and M1+ chips and others.</p> <p>In the future, we expect to increase the support to cover Metal 2 capabilities. Metal 2 introduces useful features like argument buffer and others that are necessary for performance and make IREE HAL implementation simpler. Metal 2 was released late 2017 and are supported since macOS High Sierra and iOS 11. It is already dominant (macOS, iOS).</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#programming-languages-and-libraries","title":"Programming Languages and Libraries","text":"<p>The Metal framework only exposes Objective-C or Swift programming language APIs. Metal HAL driver needs to inherit from common HAL abstraction definitions, which are in C. To minimize dependency and binary size and increase performance, we use Metal's Objective-C API for implementing the Metal HAL driver. Header (<code>.h</code>) and implementation (<code>.m</code>) files are put adjacent to each other.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#object-lifetime-management","title":"Object Lifetime Management","text":"<p>Objective-C uses refcount for tracking object lifetime and managing memory. This is traditionally done manually by sending <code>retain</code> and <code>release</code> messages to Objective-C objects. Modern Objective-C allows developers to opt in to use Automatic Reference Counting to let the compiler to automatically deduce and insert <code>retain</code>/<code>release</code> where possible to simplify the burdern of manual management.</p> <p>We don't use ARC in the Metal HAL driver given that IREE has its own object refcount and lifetime management mechanism. Metal HAL GPU objects are tracked with that to be consistent with others. Each Metal HAL GPU object <code>retain</code>s the underlying Metal <code>id&lt;MTL*&gt;</code> object on construction and <code>release</code>s on destruction.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#gpu-objects","title":"GPU Objects","text":"<p>Metal is one of the main modern GPU APIs that provide more explicit control over the hardware. The mapping between IREE HAL classes and Metal protocols are relatively straightforward:</p> IREE HAL Class Metal Protocol <code>iree_hal_driver_t</code> N/A <code>iree_hal_device_t</code> <code>MTLDevice</code> <code>iree_hal_command_buffer_t</code> <code>MTLCommandBuffer</code> <code>iree_hal_semaphore_t</code> <code>MTLSharedEvent</code> <code>iree_hal_allocator_t</code> N/A <code>iree_hal_buffer_t</code> <code>MTLBuffer</code> <code>iree_hal_executable_t</code> <code>MTLLibrary</code> <code>iree_hal_executable_cache_t</code> N/A <code>iree_hal_descriptor_set_layout_t</code> N/A <code>iree_hal_pipeline_layout_t</code> N/A <p>In the following subsections, we go over each pair to provide more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#driver","title":"Driver","text":"<p>There is no native driver abstraction in Metal. IREE's Metal HAL driver still provides a <code>iree_hal_metal_driver_t</code> struct to implement the common <code>iree_hal_driver_t</code> struct. <code>iree_hal_metal_driver_t</code> just <code>retain</code>s all available Metal devices in the system during its lifetime, to guarantee that we have the same <code>id&lt;MTLDevice&gt;</code> for device querying and creation.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_metal_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to Metal GPU device by wrapping a <code>id&lt;MTLDevice&gt;</code>. Upon construction, <code>iree_hal_metal_device_t</code> creates and retains one queue for both dispatch and transfer during its lifetime. In the future we expect to spport multiple queues for better concurrency.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer-submission","title":"Command buffer submission","text":"<p>In IREE HAL, command buffers are directly created from the <code>iree_hal_device_t</code>. It's also directly submitted there via <code>iree_hal_device_queue_execute()</code>. Each execution takes a batch of command buffers, together with a list of waiting <code>iree_hal_semaphore_t</code>s and a list signaling <code>iree_hal_semaphore_t</code>s. There is no direct mapping of such structure in Metal; so we performs the submission in three steps:</p> <ol> <li>Create a new <code>MTLCommandBuffer</code> to <code>encodeWaitForEvent:value</code> for all    waiting <code>iree_hal_semaphore_t</code>s and commit this command buffer.</li> <li>Commit all command buffers in the submmision batch.</li> <li>Create a new <code>MTLCommandBuffer</code> to <code>encodeSignalEvent:value</code> for all    signaling <code>iree_hal_semaphore_t</code>s and commit this command buffer.</li> </ol> <p>Such submission enables asynchronous execution of the workload on the GPU.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#queue-ordered-allocation","title":"Queue-ordered allocation","text":"<p>Queue-ordered asynchronous allocations via <code>iree_hal_device_queue_alloc</code> is not fully supported yet; it just translates to blocking wait and allocation.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#collectives","title":"Collectives","text":"<p>Collectives suppport is not yet implemented.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#profiling","title":"Profiling","text":"<p>The Metal HAL driver supports profiling via <code>MTLCaptureManager</code>. We can either capture to a trace file or XCode.</p> <p>To perform profiling in the command line, attach <code>--device_profiling_mode=queue --device_profiling_file=/path/to/metal.gputrace</code> to IREE binaries.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer","title":"Command buffer","text":"<p>Command buffers are where IREE HAL and Metal API have a major difference.</p> <p>IREE HAL command buffers follow the flat Vulkan recording model, where all memory or dispatch commands are recorded into a command buffer directly. Unlike Vulkan, Metal adopts a multi-level command recording model--memory/dispatch commands are not directly recorded into a command buffer; rather, they must go through the additional level of blit/compute encoders. Implementing IREE's HAL using Metal would require switching encoders for interleaved memory and dispatch commands. Additionally, certain IREE HAL API features do not have direct mapping in Metal APIs, e.g., various forms of IREE HAL execution/memory barriers. Translating them would require looking at both previous and next commands to decide the proper mapping.</p> <p>Due to these reasons, it's beneficial to have a complete view of the full command buffer and extra flexibility during recording, in order to fixup past commands, or inspect future commands.</p> <p>Therefore, to implement IREE HAL command buffers using Metal, we perform two steps using a linked list of command segments: First we create segments to keep track of all IREE HAL commands and the associated data. And then, when finalizing the command buffer, we iterate through all the segments and record their contents into a proper <code>MTLCommandBuffer</code>. A linked list gives us the flexibility to organize command sequence in low overhead; and a deferred recording gives us the complete picture of the command buffer when really started recording.</p> <p>The Metal HAL driver right now only support one-shot command buffers, by mapping to <code>MTLCommandBuffer</code>s.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#fillcopyupdate-buffer","title":"Fill/copy/update buffer","text":"<p>Metal APIs for fill and copy buffers have alignment restrictions on the offset and length. <code>iree_hal_command_buffer_{fill|copy|update}_buffer()</code> is more flexible regarding that. So for cases aren't directly supported by Metal APIs, we use polyfill compute kernels to perform the memory operation using GPU threads.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#semaphore","title":"Semaphore","text":"<p><code>iree_hal_semaphore_t</code> allows host-&gt;device, device-&gt;host, host-&gt;host, and device-&gt;device synchronization. It maps to Vulkan timeline semaphore. In Metal world, the counterpart would be <code>MTLSharedEvent</code>. Most of the <code>iree_hal_semaphore_t</code> APIs are simple to implement in <code>MetalSharedEvent</code>, with <code>iree_hal_semaphore_wait()</code> as an exception. A listener is registered on the <code>MTLSharedEvent</code> with <code>notifyListener:atValue:block:</code> to singal a semaphore to wake the current thread, which is put into sleep by waiting on the semaphore.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#allocator","title":"Allocator","text":"<p>At the moment the Metal HAL driver just has a very simple <code>iree_hal_allocator_t</code> implementation. It just wraps a <code>MTLDevice</code> and redirects all allocation requests to the <code>MTLDevice</code>. No page/pool/slab or whatever. This is meant to be used together with common allocator layers like the caching allocator.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#buffer","title":"Buffer","text":"<p>IREE <code>iree_hal_buffer_t</code> maps Metal <code>MTLBuffer</code>. See Object Lifetime Management for more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#executable","title":"Executable","text":"<p>IREE <code>iree_hal_executable_t</code> represents a GPU program archive with a driver-defined format. It maps naturally to Metal <code>MTLLibrary</code>. An entry point in a <code>MTLLibrary</code> is a <code>MTLFunction</code>. We define <code>iree_hal_metal_kernel_params_t</code> to wrap around a <code>MTLLibrary</code>, its <code>MTLFunction</code>s, and also <code>MTLComputePipelineState</code> objects constructed from <code>MTLFunction</code>s.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#executable-cache","title":"Executable cache","text":"<p>IREE <code>iree_hal_executable_cache_t</code> is modeling a cache of preprared GPU executables for a particular device. At the moment the Metal HAL driver does not peforming any caching on GPU programs; it simply reads the program from the FlatBuffer and hands it over to Metal driver.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#descriptor-set-pipeline-layout","title":"Descriptor set / pipeline layout","text":"<p>See Resource descriptors for more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#compute-pipeline","title":"Compute Pipeline","text":"","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#shaderkernel-compilation","title":"Shader/kernel compilation","text":"<p>Metal has Metal Shading Language (MSL) for authoring graphics shaders and compute kernels. MSL source code can be directly consumed by the Metal framework at run-time; it can also be compiled first into an opaque library using command-line tools at build-time.</p> <p>IREE uses compilers to compile ML models expressed with high-level op semantics down to GPU native source format. This is also the case for the Metal HAL driver. Metal does not provide an open intermediate language; we reuse the SPIR-V code generation pipeline and then cross compile the generated SPIR-V into MSL source with SPIRV-Cross. This is actually a fair common practice for targeting multiple GPU APIs in graphics programming world. For example, the Vulkan implmenation in macOS/iOS, MoltenVK, is also doing the same for shaders/kernels. The path is quite robust, as demonstrated by various games on top of MoltenVK.</p> <p>Therefore, in IREE, we have a <code>MetalSPIRVTargetBackend</code>, which pulls in the common SPIR-V passes to form the compilation pipeline. The difference would be to provide a suitable SPIR-V target environment to drive the compilation, which one can derive from the Metal GPU families to target. The serialization step differs from <code>VulkanSPIRVTargetBackend</code> too: following the normal SPIR-V serialization step, we additionally need to invoke SPRIV-Cross to cross compile the generated SPIR-V into MSL, and then compile and/or serialize the MSL source/library.</p> <p>IREE uses FlatBuffer to encode the whole workload module, including both GPU shader/kernel (called executable in IREE terminology) and CPU scheduling logic. The GPU executables are embedded as part of the module's FlatBuffer, which are <code>mmap</code>ped when IREE runs.</p> <p>For the Metal HAL driver, it means we need to embed the MSL kernels inside the module FlatBuffer. Right now we can either encode the MSL source strings and compile them at Metal run-time, or directly encoding the library instead.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#workgroupthreadgroup-size","title":"Workgroup/threadgroup size","text":"<p>When dispatching a compute kernel in Metal, we need to specify the number of thread groups in grid and the number of threads in thread group. Both are 3-D vectors. IREE HAL, which follows Vulkan, calls them workgroup count and workgroup size, respectively.</p> <p>In Vulkan programming model, workgroup count and workgroup size are specified at different places: the former is given when invoking <code>vkCmdDispatch()</code>, while the later is encoded in the dispatched SPIR-V code. This split does not match the Metal model, where we specify both in the API with <code>dispatchThreads:threadsPerThreadgroup:</code>.</p> <p>As said in shader/kernel compilation, MSL kernels are cross compiled from SPIR-V code and then embeded in the module FlatBuffer. The module FlatBuffer provides us a way to convey the threadgroup/workgroup size information extracted from the SPIR-V code. We encode an additional 3-D vector for each entry point and use it as the threadgroup size when later dispatching the <code>MTLFunction</code> corresponding to the entry point.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#resource-descriptors","title":"Resource descriptors","text":"<p>A descriptor is an opaque handle pointing to a resource that is accessed in the compute kernel. IREE's HAL models several concepts related to GPU resource management explicitly:</p> <ul> <li><code>iree_hal_descriptor_set_layout_t</code>: a schema for   describing an array of descriptor bindings. Each descriptor binding specifies   the resource type, access mode and other information.</li> <li><code>iree_hal_pipeline_layout_t</code>: a schema for describing all   the resources accessed by a compute pipeline. It includes zero or more   <code>DescriptorSetLayout</code>s and (optional) push constants.</li> </ul> <p>However, this isn't totally matching Metal's paradigm. In the Metal framework, the closest concept to descriptor sets would be argument buffer. There is no direct correspondence to descriptor set layout and pipeline layout. Rather, the layout is implicitly encoded in Metal shaders as MSL structs. The APIs for creating argument buffers do not encourage early creation without pipelines: one typically creates them for each <code>MTLFunction</code>.</p> <p>All of this means it's better to defer the creation of the argument buffer until the point of compute pipeline creation and dispatch. Therefore, the Metal HAL driver's <code>iree_hal_metal_descriptor_set_layout_t</code> and <code>iree_hal_metal_pipeline_layout_t</code> are just containers holding the information up for recording command buffer dispatch.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer-dispatch","title":"Command buffer dispatch","text":"<p>Metal HAL driver command buffer dispatch recording performs the following steps with the current active <code>MTLComputeCommandEncoder</code>:</p> <ol> <li>Bind the <code>MTLComputePipelineState</code> for the current entry function.</li> <li>Encode the push constants using <code>setBytes:length:atIndex</code>.</li> <li>For each bound descriptor set at set #<code>S</code>:</li> <li>Create a <code>MTLArgumentEncoder</code> for encoding an       associated argument <code>MTLBuffer</code>.</li> <li>For each bound resource buffer at binding #<code>B</code> in this descriptor set,       encode it to the argument buffer index #<code>B</code> with       <code>setBuffer::offset::atIndex:</code> and inform the <code>MTLComputeCommandEncoder</code>       that the dispatch will use this resource with <code>useResource:usage:</code>.</li> <li>Set the argument <code>MTLBuffer</code> to buffer index #<code>S</code>.</li> <li>Dispatch with <code>dispatchThreadgroups:threadsPerThreadgroup:</code>.</li> </ol>","tags":["GPU","Metal"]},{"location":"developers/general/contributing/","title":"Contributing to IREE","text":"<p>We'd love to accept your patches and contributions to this project.</p> <p>Note - coordinating efforts</p> <p>Please file issues or reach out on any of our other communication channels before doing substantial work; this will ensure that others don't duplicate the work and that there's a chance to discuss any design issues.</p>"},{"location":"developers/general/contributing/#developer-policies","title":"Developer policies","text":""},{"location":"developers/general/contributing/#code-of-conduct","title":"Code of conduct","text":"<p>This project follows the OpenXLA Code of Conduct.</p>"},{"location":"developers/general/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"<p>Contributions to this project must be accompanied by a Contributor License Agreement (CLA). Head over to https://cla.developers.google.com/ to see your current agreements on file or to sign a new one.</p> <ul> <li>You (or your employer) retain the copyright to your contribution; this simply   gives us permission to use and redistribute your contributions as part of the   project.</li> <li>You generally only need to submit a CLA once, so if you've already submitted   one (even if it was for a different project), you probably don't need to do it   again.</li> </ul>"},{"location":"developers/general/contributing/#coding-style-guidelines","title":"Coding style guidelines","text":"<p>Most of the code style is derived from the Google Style Guides for the appropriate language and is generally not something we accept changes on (as clang-format and other linters set that for us). The C++ compiler portion of the project follows the MLIR/LLVM style guide.</p> <p>Improvements to code structure and clarity are welcome but please file issues to track such work first. Pure style changes are unlikely to be accepted unless they are applied consistently across the project.</p> Tip - code formatters and lint scripts <p>Formatters like <code>clang-format</code> (C/C++) and Black (Python) can be set to run automatically in your editor of choice.</p> <p>The script at <code>build_tools/scripts/lint.sh</code> can also be used to run the full suite of lint checks.</p>"},{"location":"developers/general/contributing/#code-reviews","title":"Code reviews","text":"<p>All submissions, including submissions by maintainers, require review. We use GitHub pull requests (PRs) for this purpose. Consult GitHub Help for more information on using pull requests.</p> <ul> <li>Please keep PRs small (focused on a single issue) to make reviews and later   culprit-finding easier.</li> <li>You may see trusted core contributors bending this rule for project   maintenance and major subsystem renovation. If you feel like the rules aren't   working for a certain situation, please ask as we bias towards pragmatism for   cases that require it.</li> </ul>"},{"location":"developers/general/contributing/#testing-policy","title":"Testing policy","text":"<p>With few exceptions, features should be accompanied by automated tests.</p> <p>We use a mix of in-tree and out-of-tree unit and integration tests. For more information about the types of tests used across the project, refer to the testing guide.</p>"},{"location":"developers/general/contributing/#github-actions-workflows","title":"GitHub Actions workflows","text":"<p>We use GitHub Actions to automatically build and test various parts of the project.</p> <ul> <li>Most presubmit workflows will only run automatically on PRs if you are a   project collaborator. Otherwise a maintainer must   approve workflow runs.   If you are sending code changes to the project, please ask to be added as a   collaborator, so that these can run automatically.</li> <li>It is generally expected that PRs will only be merged when all checks are   passing. In some cases, pre-existing failures may be bypassed by a maintainer.</li> </ul> Tip - adjusting workflow behavior <p>Some workflows only run on commits after they are merged. See the CI behavior manipulation section below to learn how to customize this behavior.</p>"},{"location":"developers/general/contributing/#merging-approved-changes","title":"Merging approved changes","text":"<p>After review and presubmit checks, PRs should typically be merged using \"squash and merge\".</p> <ul> <li>The squashed commit summary should match the PR title and the commit   description should match the PR body (this is the default behavior).   Accordingly, please write these as you would a helpful commit message.</li> </ul> <p>It is assumed that the PR author will merge their change unless they ask someone else to merge it for them (e.g. because they don't have write access yet).</p>"},{"location":"developers/general/contributing/#obtaining-commit-access","title":"Obtaining commit access","text":"<p>Access to affiliated repositories is divided into tiers:</p> Tier Description Team link Triage New project members should typically start here Can be assigned issues Can apply labels to issues / PRs Can run workflows without approval iree-triage Write Established project contributors should request this access Can merge approved pull requests Can create branches iree-write Maintain/Admin  Can edit repository settings Can push to protected branches Added case-by-case <p>All access tiers first require joining the iree-org GitHub organization.</p> <p>Fill out this form to request access </p> <p>Once you are a member of the iree-org GitHub organization, you can request to join any of the teams on https://github.com/orgs/iree-org/teams.</p> <p>Note: other GitHub organizations</p> <p>Work on IREE sometimes spans other GitHub organizations like shark-infra. Reach out to a project member if you would also like access to repositories in those organizations.</p>"},{"location":"developers/general/contributing/#credits-in-the-authors-file","title":"Credits in the AUTHORS file","text":"<p>If you would like additional recognition for your contributions, you may add yourself or your organization to the AUTHORS file that keeps track of those who have made significant contributions to the project.</p> <ul> <li>Please add the entity who owns the copyright for your contribution.</li> <li>The source control history remains the most accurate source for individual   contributions.</li> </ul>"},{"location":"developers/general/contributing/#tips-for-contributors","title":"Tips for contributors","text":""},{"location":"developers/general/contributing/#tool-recommendations","title":"Tool recommendations","text":"Program or tool Description  Visual Studio Code (VSCode) The most commonly used editor amongst IREE developers  Ccache A fast C/C++ compiler cache. See the CMake with <code>ccache</code> page  GitHub CLI A CLI for interacting with GitHub  \"Refined GitHub\" browser extensions Extension that add features to the GitHub UI"},{"location":"developers/general/contributing/#build-systems","title":"Build systems","text":"<p>IREE supports building from source with both Bazel and CMake.</p> <ul> <li>CMake is the preferred build system and offers the most flexible   configuration options</li> <li>Bazel is a stricter build system and helps with usage in Google's downstream   source repository</li> <li>Certain dependencies (think large/complex projects like CUDA, TensorFlow,   PyTorch, etc.) may be difficult to support with one build system or the   other, so the project may configure these as optional</li> </ul>"},{"location":"developers/general/contributing/#continuous-integration-ci","title":"Continuous integration (CI)","text":"<p>IREE uses GitHub Actions for CI. The primary CI is configured in the ci.yml workflow file.</p>"},{"location":"developers/general/contributing/#self-hosted-runners","title":"Self-hosted runners","text":"<p>In addition to the default runners GitHub provides, IREE uses self-hosted runners to run many of its workflow jobs. These enable access to additional compute and custom configurations such as accelerators. Configuration scripting is checked in to this repository (see the README for that directory).</p>"},{"location":"developers/general/contributing/#custom-managed-runners","title":"Custom managed runners","text":"<p>In addition to our self-hosted runners, we use GitHub's large managed runners for some platforms.</p>"},{"location":"developers/general/contributing/#ci-behavior-manipulation","title":"CI behavior manipulation","text":"<p>The setup step of the CI determines which CI jobs to run. This is controlled by the configure_ci.py script. It will generally run a pre-determined set of jobs on presubmit with some jobs kept as post-submit only. If changes are only to a certain set of excluded files that we know don't affect CI (e.g. Markdown files), then it will skip the jobs.</p> <p>You can customize which jobs run using git trailers in the PR description.</p> <p>The available options are</p> <pre><code>ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\nskip-ci: free form reason\nskip-llvm-integrate-benchmark: free form reason\nbenchmark-extra: extra,benchmarks,to,run\nrunner-env: [testing|prod]\n</code></pre> Using <code>skip-ci</code> <p><code>skip-ci</code> skips all jobs. It is mutually exclusive with the other <code>ci-*</code> options and is synonomous with <code>ci-skip: all</code>.</p> <pre><code>skip-ci: free form reason\n</code></pre> Using <code>ci-skip</code>, <code>ci-extra</code>, <code>ci-exactly</code> <p>The <code>ci-*</code> options instruct the setup script on which jobs to include or exclude from its run. They take a comma-separated list of jobs which must be from the set of top-level job identifiers in the <code>ci.yml</code> file or the special keyword \"all\" to indicate all jobs.</p> <pre><code>ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\n</code></pre> <ul> <li><code>ci-skip</code> removes jobs that would otherwise be included, though it is not an error to list jobs that would not be included by default.</li> <li><code>ci-extra</code> adds additional jobs that would not have otherwise been run, though it is not an error to list jobs that would have been included anyway. It is an error to list a job in both \"skip\" and \"extra\".</li> <li><code>ci-exactly</code> provides an exact list of jobs that should run. It is mutually exclusive with both \"skip\" and \"extra\".</li> </ul> <p>In all these cases, the setup does not make any effort to ensure that job dependencies are satisfied. Thus, if you request skipping the <code>build_all</code> job, all the jobs that depend on it will fail, not be skipped.</p> Using <code>benchmark-extra</code>, <code>skip-llvm-integrate-benchmark</code> <pre><code>benchmark-extra: extra,benchmarks,to,run\nskip-llvm-integrate-benchmark: free form reason\n</code></pre> <p>Benchmarks don't run by default on PRs, and must be specifically requested.</p> <p>The <code>benchmark-extra</code> option allows specifying additional benchmark presets to run as part of benchmarking. It accepts a comma-separated list of benchmark presets. This combines with labels added to the PR (which are a more limited set of options). See the benchmark suites documentation.</p> <p>Benchmarks do run by default on PRs detected to be an integration of LLVM into IREE, but this behavior can be disabled with <code>skip-llvm-integrate-benchmark</code>.</p> Using <code>runner-env</code> <p>The <code>runner-env</code> option controls which runner environment to target for our self-hosted runners. We maintain a test environment to allow testing out new configurations prior to rolling them out. This trailer is for advanced users who are working on the CI infrastructure itself.</p> <pre><code>runner-env: [testing|prod]\n</code></pre>"},{"location":"developers/general/contributing/#ci-configuration-recipes","title":"CI configuration recipes","text":"<p>Copy/paste any of these at the bottom of a PR description to change what the CI runs.</p> <ul> <li> <p>Also run Windows and macOS builds that are normally post-merge only:</p> <pre><code>ci-extra: build_test_all_windows,build_test_all_macos_arm64,build_test_all_macos_x86_64\n</code></pre> </li> <li> <p>Also run GPU tests on NVIDIA A100 runners (opt-in due to low availability):</p> <pre><code>ci-extra: test_nvidia_a100\n</code></pre> </li> <li> <p>Skip all CI builds and tests, e.g. for comment-only changes:</p> <pre><code>skip-ci: Comment-only change.\n</code></pre> </li> <li> <p>Only run Bazel builds, e.g. for changes only affecting Bazel rules:</p> <pre><code>ci-exactly: build_test_all_bazel\n</code></pre> </li> </ul> <p>For example, this PR opted in to running the <code>build_test_all_windows</code> job:</p> <p></p> <p>The enabled jobs can be viewed from the Summary page of an action run:</p> <p></p>"},{"location":"developers/general/contributing/#git-workflows","title":"Git workflows","text":"<p>We tend to use the \"triangular\" or \"forking\" workflow. Develop primarily on a clone of the repository on your development machine. Any local branches named the same as persistent branches from the main repository are pristine (though potentially stale) copies. You only fastforward these to match upstream and otherwise do development on other branches. When sending PRs, you push to a different branch on your public fork and create the PR from there.</p>"},{"location":"developers/general/contributing/#setup","title":"Setup","text":"<ol> <li> <p>Create a fork of the main repository.</p> </li> <li> <p>Create a local git repository with remotes <code>upstream</code> (the main repository)     and <code>origin</code> (your personal fork). To list your current remotes     <code>git remote -v</code>.</p> <p>a. If you already cloned from the main repository (e.g. by following the getting started guide):</p> <pre><code># From your existing git repo\n$ git remote rename origin upstream\n$ git remote add origin https://github.com/&lt;github_username&gt;/iree.git\n</code></pre> <p>b. If you haven't already cloned:</p> <pre><code># From whatever directory under which you want to nest your repo\n$ git clone https://github.com/&lt;github_username&gt;/iree.git\n$ cd iree\n$ git remote add upstream https://github.com/iree-org/iree.git\n</code></pre> <p>This is especially important for maintainers who have write access (so can push directly to the main repository) and admins who have elevated privileges (so can push directly to protected branches).</p> <p>These names are just suggestions, but you might find some scripts where the defaults are for remotes named like this.</p> <p>For extra safety, you can make it difficult to push directly to upstream by setting the push url to something invalid: <code>git remote set-url --push upstream DISABLE</code>, which requires re-enabling the push URL explicitly before pushing. You can wrap this behavior in a custom git command like git-sudo.</p> </li> <li> <p>Use a script like     git_update.sh     to easily synchronize <code>main</code> with <code>upstream</code>. Submodules make this is a     little trickier than it should be. You can also turn this into a git command     by adding it to your path as <code>git-update</code>.</p> </li> </ol>"},{"location":"developers/general/contributing/#git-config","title":"Git config","text":"<p>These are some additional options you could put in your top-level <code>.gitconfig</code> or repository-specific <code>.git/config</code> files that are conducive the recommended workflow</p> <pre><code>[push]\n  default = current\n[alias]\n  # Delete branches that you pushed and have been deleted upstream, e.g. because\n  # the PR was merged.\n  gone = ! \"git fetch -p  &amp;&amp; git for-each-ref --format '%(refname:short) %(upstream:track)' | awk '$2 == \\\"[gone]\\\" {print $1}' | xargs -r git branch -D\"\n  # Update from upstream (custom command) and delete obsolete local branches.\n  sync = ! (git update main &amp;&amp; git gone)\n  # Create a new branch based off of main (requires a clean working directory).\n  new = \"!f(){ \\\\\\ngit checkout main &amp;&amp; git switch -c $1; \\\\\\n}; f\"\n  # Display branches in a useful \"latest last\" format\n  br = for-each-ref --sort=committerdate refs/heads/ --format='%(HEAD) %(color:yellow)%(refname:short)%(color:reset) - %(color:red)%(objectname:short)%(color:reset) - %(contents:subject) (%(color:green)%(committerdate:relative)%(color:reset))'\n  # `git git foo` -&gt; `git foo` typo fixer\n  git = \"!f(){ \\\\\\n git \\\"$@\\\"; \\\\\\n}; f\"\n  # Get the git root directory\n  root = rev-parse --show-toplevel\n  # checkout, but also sync submodules\n  ch = \"!f() { \\\\\\n git checkout \\\"$@\\\"; git submodule sync &amp;&amp; git submodule update --init; \\\\\\n}; f\"\n  # See the diff for a PR branch vs the main branch\n  diffmain = diff --merge-base main\n  # See only the files that differ vs the main branch\n  whatsout = diffmain --name-only\n[checkout]\n  # If the checkout command\n  defaultRemote = origin\n[pull]\n  # When pulling, only complete the pull if its a clean fast forward.\n  ff = only\n[remote]\n  # Push to your fork (origin) by default\n  pushDefault = origin\n[url \"ssh://git@github.com/\"]\n  # Pull with https (so no auth required), but push with ssh.\n  pushInsteadOf = https://github.com/\n</code></pre>"},{"location":"developers/general/developer-overview/","title":"Developer overview","text":"<p>This guide provides an overview of IREE's project structure and main tools for developers.</p>"},{"location":"developers/general/developer-overview/#project-code-layout","title":"Project code layout","text":"<ul> <li>/compiler/:   MLIR dialects, LLVM compiler passes, module translation code, etc.<ul> <li>bindings/: Python and other language bindings</li> </ul> </li> <li>/runtime/:   Standalone runtime code including the VM and HAL drivers<ul> <li>bindings/: Python and other language bindings</li> </ul> </li> <li>/integrations/:   Integrations between IREE and other frameworks, such as TensorFlow</li> <li>/tests/:   Tests for full compiler-&gt;runtime workflows</li> <li>/tools/:   Developer tools (<code>iree-compile</code>, <code>iree-run-module</code>, etc.)</li> <li>/samples/: Also see the   separate https://github.com/iree-org/iree-experimental repository</li> </ul>"},{"location":"developers/general/developer-overview/#iree-compiler-code-layout","title":"IREE compiler code layout","text":"<ul> <li>API/:   Public C API</li> <li>Codegen/:   Code generation for compute kernels</li> <li>Dialect/:   MLIR dialects (<code>Flow</code>, <code>HAL</code>, <code>Stream</code>, <code>VM</code>, etc.)</li> <li>InputConversion/:   Conversions from input dialects and preprocessing</li> </ul>"},{"location":"developers/general/developer-overview/#iree-runtime-code-layout","title":"IREE runtime code layout","text":"<ul> <li>base/:   Common types and utilities used throughout the runtime</li> <li>hal/:   Hardware Abstraction Layer for IREE's runtime, with   implementations for hardware and software backends</li> <li>schemas/:   Data storage format definitions, primarily using   FlatBuffers</li> <li>task/:   System for running tasks across multiple CPU threads</li> <li>tooling/:   Utilities for tests and developer tools, not suitable for use as-is in   downstream applications</li> <li>vm/:   Bytecode Virtual Machine used to work with IREE modules and invoke   IREE functions</li> </ul>"},{"location":"developers/general/developer-overview/#developer-tools","title":"Developer tools","text":"<p>IREE's core compiler accepts programs in supported input MLIR dialects (e.g. <code>stablehlo</code>, <code>tosa</code>, <code>linalg</code>). Import tools and APIs may be used to convert from framework-specific formats like TensorFlow SavedModel to MLIR modules. While programs are ultimately compiled down to modules suitable for running on some combination of IREE's target deployment platforms, IREE's developer tools can run individual compiler passes, translations, and other transformations step by step.</p>"},{"location":"developers/general/developer-overview/#iree-opt","title":"iree-opt","text":"<p><code>iree-opt</code> is a tool for testing IREE's compiler passes. It is similar to mlir-opt and runs sets of IREE's compiler passes on <code>.mlir</code> input files. See \"conversion\" in MLIR's Glossary for more information. Transformations performed by <code>iree-opt</code> can range from individual passes performing isolated manipulations to broad pipelines that encompass a sequence of steps.</p> <p>Test <code>.mlir</code> files that are checked in typically include a <code>RUN</code> block at the top of the file that specifies which passes should be performed and if <code>FileCheck</code> should be used to test the generated output.</p> <p>Here's an example of a small compiler pass running on a test file:</p> <pre><code>$ ../iree-build/tools/iree-opt \\\n  --split-input-file \\\n  --mlir-print-ir-before-all \\\n  --iree-util-drop-compiler-hints \\\n  $PWD/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir\n</code></pre> <p>For a more complex example, here's how to run IREE's complete transformation pipeline targeting the VMVX backend on the fullyconnected.mlir model file:</p> <pre><code>$ ../iree-build/tools/iree-opt \\\n  --iree-transformation-pipeline \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/tests/e2e/stablehlo_models/fullyconnected.mlir\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-compile","title":"iree-compile","text":"<p><code>iree-compile</code> is IREE's main compiler driver for generating binaries from supported input MLIR assembly.</p> <p>For example, to translate <code>simple.mlir</code> to an IREE module:</p> <pre><code>$ ../iree-build/tools/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-run-module","title":"iree-run-module","text":"<p>The <code>iree-run-module</code> program takes an already translated IREE module as input and executes an exported function using the provided inputs.</p> <p>This program can be used in sequence with <code>iree-compile</code> to translate a <code>.mlir</code> file to an IREE module and then execute it. Here is an example command that executes the simple <code>simple_abs_vmvx.vmfb</code> compiled from <code>simple_abs.mlir</code> above on IREE's local-task CPU device:</p> <pre><code>$ ../iree-build/tools/iree-run-module \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <p>Input scalars are passed as <code>value</code> and input buffers are passed as <code>[shape]xtype=[value]</code>.</p> <ul> <li>Input buffers may also be read from raw binary files or Numpy npy files.</li> </ul> MLIR type Description Input example <code>i32</code> Scalar <code>--input=1234</code> <code>tensor&lt;i32&gt;</code> 0-D tensor <code>--input=i32=1234</code> <code>tensor&lt;1xi32&gt;</code> 1-D tensor (shape [1]) <code>--input=1xi32=1234</code> <code>tensor&lt;2xi32&gt;</code> 1-D tensor (shape [2]) <code>--input=\"2xi32=12 34\"</code> <code>tensor&lt;2x3xi32&gt;</code> 2-D tensor (shape [2, 3]) <code>--input=\"2x3xi32=[1 2 3][4 5 6]\"</code> Other usage examples <p>See these test files for advanced usage examples:</p> Basic testsInputsOutputsExpected <p>Source file: <code>tools/test/iree-run-module.mlir</code></p> tools/test/iree-run-module.mlir<pre><code>// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=\"2xf32=-2 3\") | FileCheck %s\n// RUN: (iree-compile --iree-hal-target-backends=llvm-cpu %s | iree-run-module --device=local-task --module=- --function=abs --input=\"2xf32=-2 3\") | FileCheck %s\n\n// CHECK-LABEL: EXEC @abs\nfunc.func @abs(%input : tensor&lt;2xf32&gt;) -&gt; (tensor&lt;2xf32&gt;) {\n  %result = math.absf %input : tensor&lt;2xf32&gt;\n  return %result : tensor&lt;2xf32&gt;\n}\n  // INPUT-BUFFERS: result[1]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2xf32=-2.0 3.0\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-inputs.mlir</code></p> tools/test/iree-run-module-inputs.mlir<pre><code>// Passing no inputs is okay.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=no_input) | \\\n// RUN: FileCheck --check-prefix=NO-INPUT %s\n// NO-INPUT-LABEL: EXEC @no_input\nfunc.func @no_input() {\n  return\n}\n\n// -----\n\n// Scalars use the form `--input=value`. Type (float/int) should be omitted.\n//   * The VM does not use i1/i8 types, so i32 VM types are returned instead.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=- \\\n// RUN:                  --function=scalars \\\n// RUN:                  --input=1 \\\n// RUN:                  --input=5 \\\n// RUN:                  --input=1234 \\\n// RUN:                  --input=-3.14) | \\\n// RUN: FileCheck --check-prefix=INPUT-SCALARS %s\n// INPUT-SCALARS-LABEL: EXEC @scalars\nfunc.func @scalars(%arg0: i1, %arg1: i8, %arg2 : i32, %arg3 : f32) -&gt; (i1, i8, i32, f32) {\n  // INPUT-SCALARS: result[0]: i32=1\n  // INPUT-SCALARS: result[1]: i32=5\n  // INPUT-SCALARS: result[2]: i32=1234\n  // INPUT-SCALARS: result[3]: f32=-3.14\n  return %arg0, %arg1, %arg2, %arg3 : i1, i8, i32, f32\n}\n\n// -----\n\n// Buffers (\"tensors\") use the form `--input=[shape]xtype=[value]`.\n//   * If any values are omitted, zeroes will be used.\n//   * Quotes should be used around values with spaces.\n//   * Brackets may also be used to separate element values.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=- \\\n// RUN:                  --function=buffers \\\n// RUN:                  --input=i32=5 \\\n// RUN:                  --input=2xi32 \\\n// RUN:                  --input=\"2x3xi32=1 2 3 4 5 6\") | \\\n// RUN: FileCheck --check-prefix=INPUT-BUFFERS %s\n// INPUT-BUFFERS-LABEL: EXEC @buffers\nfunc.func @buffers(%arg0: tensor&lt;i32&gt;, %arg1: tensor&lt;2xi32&gt;, %arg2: tensor&lt;2x3xi32&gt;) -&gt; (tensor&lt;i32&gt;, tensor&lt;2xi32&gt;, tensor&lt;2x3xi32&gt;) {\n  // INPUT-BUFFERS: result[0]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: i32=5\n  // INPUT-BUFFERS: result[1]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2xi32=0 0\n  // INPUT-BUFFERS: result[2]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2x3xi32=[1 2 3][4 5 6]\n  return %arg0, %arg1, %arg2 : tensor&lt;i32&gt;, tensor&lt;2xi32&gt;, tensor&lt;2x3xi32&gt;\n}\n\n// -----\n\n// Buffer values can be read from binary files with `@some/file.bin`.\n//   * numpy npy files from numpy.save or previous tooling output can be read to\n//     provide 1+ values.\n//   * Some data types may be converted (i32 -&gt; si32 here) - bug?\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=npy_round_trip \\\n// RUN:                  --input=2xi32=11,12 \\\n// RUN:                  --input=3xi32=1,2,3 \\\n// RUN:                  --output=@%t.npy \\\n// RUN:                  --output=+%t.npy &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=npy_round_trip \\\n// RUN:                  --input=*%t.npy) | \\\n// RUN: FileCheck --check-prefix=INPUT-NUMPY %s\n\n// INPUT-NUMPY-LABEL: EXEC @npy_round_trip\nfunc.func @npy_round_trip(%arg0: tensor&lt;2xi32&gt;, %arg1: tensor&lt;3xi32&gt;) -&gt; (tensor&lt;2xi32&gt;, tensor&lt;3xi32&gt;) {\n  // INPUT-NUMPY: result[0]: hal.buffer_view\n  // INPUT-NUMPY-NEXT: 2xsi32=11 12\n  // INPUT-NUMPY: result[1]: hal.buffer_view\n  // INPUT-NUMPY-NEXT: 3xsi32=1 2 3\n  return %arg0, %arg1 : tensor&lt;2xi32&gt;, tensor&lt;3xi32&gt;\n}\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-outputs.mlir</code></p> tools/test/iree-run-module-outputs.mlir<pre><code>// Tests that execution providing no outputs is ok.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=no_output) | \\\n// RUN: FileCheck --check-prefix=NO-OUTPUT %s\n// NO-OUTPUT-LABEL: EXEC @no_output\nfunc.func @no_output() {\n  return\n}\n\n// -----\n\n// Tests the default output printing to stdout.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=default) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-DEFAULT %s\n// OUTPUT-DEFAULT-LABEL: EXEC @default\nfunc.func @default() -&gt; (i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // OUTPUT-DEFAULT: result[0]: i32=123\n  %0 = arith.constant 123 : i32\n  // OUTPUT-DEFAULT: result[1]: hal.buffer_view\n  // OUTPUT-DEFAULT-NEXT: f32=4\n  %1 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  // OUTPUT-DEFAULT: result[2]: hal.buffer_view\n  // OUTPUT-DEFAULT-NEXT: 2x4xi32=[0 1 2 3][4 5 6 7]\n  %2 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1, %2 : i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n\n// -----\n\n// Tests explicit output to npy files by producing a concatenated .npy and then\n// printing the results in python. This also verifies our npy files can be\n// parsed by numpy.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=numpy \\\n// RUN:                  --output= \\\n// RUN:                  --output=@%t.npy \\\n// RUN:                  --output=+%t.npy) &amp;&amp; \\\n// RUN:  \"%PYTHON\" %S/echo_npy.py %t.npy | \\\n// RUN: FileCheck --check-prefix=OUTPUT-NUMPY %s\nfunc.func @numpy() -&gt; (i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // Output skipped:\n  %0 = arith.constant 123 : i32\n  // OUTPUT-NUMPY{LITERAL}: 4.0\n  %1 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  // OUTPUT-NUMPY-NEXT{LITERAL}: [[0 1 2 3]\n  // OUTPUT-NUMPY-NEXT{LITERAL}:  [4 5 6 7]]\n  %2 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1, %2 : i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n\n// -----\n\n// Tests output to binary files by round-tripping the output of a function into\n// another invocation reading from the binary files. Each output is written to\n// its own file (optimal for alignment/easier to inspect).\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=write_binary \\\n// RUN:                  --output=@%t.0.bin \\\n// RUN:                  --output=@%t.1.bin &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=echo_binary \\\n// RUN:                  --input=f32=@%t.0.bin \\\n// RUN:                  --input=2x4xi32=@%t.1.bin) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-BINARY %s\n\n// Tests output to binary files by round-tripping the output of a function into\n// another invocation reading from the binary files. The values are appended to\n// a single file and read from the single file.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=write_binary \\\n// RUN:                  --output=@%t.bin \\\n// RUN:                  --output=+%t.bin &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=echo_binary \\\n// RUN:                  --input=f32=@%t.bin \\\n// RUN:                  --input=2x4xi32=+%t.bin) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-BINARY %s\n\nfunc.func @write_binary() -&gt; (tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  %0 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  %1 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1 : tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\nfunc.func @echo_binary(%arg0: tensor&lt;f32&gt;, %arg1: tensor&lt;?x4xi32&gt;) -&gt; (tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // OUTPUT-BINARY{LITERAL}: f32=4\n  // OUTPUT-BINARY{LITERAL}: 2x4xi32=[0 1 2 3][4 5 6 7]\n  return %arg0, %arg1 : tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-expected.mlir</code></p> tools/test/iree-run-module-expected.mlir<pre><code>// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=2.0) | FileCheck %s --check-prefix=SUCCESS-MATCHES\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=\"(ignored)\") | FileCheck %s --check-prefix=SUCCESS-IGNORED\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=2.1 --expected_f32_threshold=0.1) | FileCheck %s --check-prefix=SUCCESS-THRESHOLD\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=123 --expected_output=f32=2.0) | FileCheck %s --check-prefix=FAILED-FIRST\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=4.5) | FileCheck %s --check-prefix=FAILED-SECOND\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=4xf32=2.0) | FileCheck %s --check-prefix=FAILED-SHAPE\n\n// SUCCESS-MATCHES: [SUCCESS]\n// SUCCESS-THRESHOLD: [SUCCESS]\n// SUCCESS-IGNORED: [SUCCESS]\n// FAILED-FIRST: [FAILED] result[0]: element at index 0 (-2) does not match the expected (123)\n// FAILED-SECOND: [FAILED] result[1]: element at index 0 (2) does not match the expected (4.5)\n// FAILED-SHAPE: [FAILED] result[1]: metadata is f32; expected that the view matches 4xf32\n\nfunc.func @abs(%input: tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;, tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %input, %result : tensor&lt;f32&gt;, tensor&lt;f32&gt;\n}\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-check-module","title":"iree-check-module","text":"<p>The <code>iree-check-module</code> program takes an already translated IREE module as input and executes it as a series of googletest tests. This is the test runner for the IREE check framework.</p> <pre><code>$ ../iree-build/tools/iree-compile \\\n  --iree-input-type=stablehlo \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/tests/e2e/stablehlo_ops/abs.mlir \\\n  -o /tmp/abs.vmfb\n</code></pre> <pre><code>$ ../iree-build/tools/iree-check-module \\\n  --device=local-task \\\n  --module=/tmp/abs.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-run-mlir","title":"iree-run-mlir","text":"<p>The <code>iree-run-mlir</code> program takes a <code>.mlir</code> file as input, translates it to an IREE bytecode module, and executes the module.</p> <p>It is designed for testing and debugging, not production uses, and therefore does some additional work that usually must be explicit, like marking every function as exported by default and running all of them.</p> <p>For example, to execute the contents of samples/models/simple_abs.mlir:</p> <pre><code># iree-run-mlir &lt;compiler flags&gt; [input.mlir] &lt;runtime flags&gt;\n$ ../iree-build/tools/iree-run-mlir \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  --input=f32=-2\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-dump-module","title":"iree-dump-module","text":"<p>The <code>iree-dump-module</code> program prints the contents of an IREE module FlatBuffer file.</p> <p>For example, to inspect the module translated above:</p> <pre><code>../iree-build/tools/iree-dump-module /tmp/simple_abs_vmvx.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#useful-generic-flags","title":"Useful generic flags","text":""},{"location":"developers/general/developer-overview/#read-inputs-from-a-file","title":"Read inputs from a file","text":"<p>All the IREE tools support reading input values from a file. This is quite useful for debugging. Use <code>--help</code> for each tool to see what the flag to set. The inputs are expected to be newline-separated. Each input should be either a scalar or a buffer. Scalars should be in the format <code>type=value</code> and buffers should be in the format <code>[shape]xtype=[value]</code>. For example:</p> <pre><code>1x5xf32=1,-2,-3,4,-5\n1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1\n</code></pre>"},{"location":"developers/general/developer-overview/#-iree-flow-trace-dispatch-tensors","title":"<code>--iree-flow-trace-dispatch-tensors</code>","text":"<p>This flag will enable tracing inputs and outputs for each dispatch function. It is easier to narrow down test cases, since IREE breaks a ML workload into multiple dispatch function. When the flag is on, IREE will insert trace points before and after each dispatch function. The first trace op is for inputs, and the second trace op is for outputs. There will be two events for one dispatch function.</p>"},{"location":"developers/general/developer-tips/","title":"Developer tips and tricks","text":"<p>The IREE compiler is built using MLIR, so it naturally supports the common MLIR debugging workflows. For areas where IREE differentiates itself, this page lists other helpful tips and tricks.</p>"},{"location":"developers/general/developer-tips/#setting-compiler-options","title":"Setting compiler options","text":"<p>Tools such as <code>iree-compile</code> take options via command-line flags. Pass <code>--help</code> to see the full list:</p> <pre><code>$ iree-compile --help\n\nOVERVIEW: IREE compilation driver\n\nUSAGE: iree-compile [options] &lt;input file or '-' for stdin&gt;\n\nOPTIONS:\n  ...\n</code></pre> <p>Tip - Options and the Python bindings</p> <p>If you are using the Python bindings, options can be passed via the <code>extra_args=[\"--flag\"]</code> argument:</p> <pre><code>import iree.compiler as ireec\n\ninput_mlir = \"\"\"\nfunc.func @abs(%input : tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %result : tensor&lt;f32&gt;\n}\"\"\"\n\ncompiled_module = ireec.tools.compile_str(\n    input_mlir,\n    target_backends=[\"llvm-cpu\"],\n    extra_args=[\"--mlir-timing\"])\n</code></pre>"},{"location":"developers/general/developer-tips/#inspecting-vmfb-files","title":"Inspecting <code>.vmfb</code> files","text":"<p>The IREE compiler generates FlatBuffer files using the <code>.vmfb</code> file extension, short for \"Virtual Machine FlatBuffer\", which can then be loaded and executed using IREE's runtime.</p> Info - other output formats <p>The IREE compiler can output different formats with the <code>`--output-format=</code> flag:</p> Flag value Output <code>--output-format=vm-bytecode</code> (default) VM Bytecode (<code>.vmfb</code>) files <code>--output-format=vm-c</code> C source modules <p>VM Bytecode files are usable across a range of deployment scenarios, while C source modules provide low level connection points for constrained environments like bare metal platforms.</p> <p>By default, <code>.vmfb</code> files can be opened as zip files: (1)</p> <ol> <li>Setting <code>--iree-vm-emit-polyglot-zip=false</code> will disable this feature and    decrease file size slightly</li> </ol> <pre><code>$ unzip -d simple_abs_cpu ./simple_abs_cpu.vmfb\n\nArchive:  ./simple_abs_cpu.vmfb\n  extracting: simple_abs_cpu/module.fb\n  extracting: simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n</code></pre> <p>The embedded binary (here an ELF shared object with CPU code) can be parsed by standard tools:</p> <pre><code>$ readelf -Ws ./simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n\nSymbol table '.dynsym' contains 2 entries:\n  Num:    Value          Size Type    Bind   Vis      Ndx Name\n    0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND\n    1: 0000000000001760    17 FUNC    GLOBAL DEFAULT    7 iree_hal_executable_library_query\n\nSymbol table '.symtab' contains 42 entries:\n  Num:    Value          Size Type    Bind   Vis      Ndx Name\n    0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND\n    1: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS abs_dispatch_0\n    2: 0000000000001730    34 FUNC    LOCAL  DEFAULT    7 abs_dispatch_0_generic\n    3: 00000000000034c0    80 OBJECT  LOCAL  DEFAULT    8 iree_hal_executable_library_query_v0\n    4: 0000000000001780   111 FUNC    LOCAL  DEFAULT    7 iree_h2f_ieee\n    5: 00000000000017f0   207 FUNC    LOCAL  DEFAULT    7 iree_f2h_ieee\n    ...\n</code></pre> <p>The <code>iree-dump-module</code> tool can also be used to see information about a given <code>.vmfb</code> file:</p> <pre><code>$ iree-dump-module simple_abs.vmfb\n\n//===---------------------------------------------------------------------===//\n// @module : version 0\n//===---------------------------------------------------------------------===//\n\nRequired Types:\n  [  0] i32\n  [  1] i64\n  [  2] !hal.allocator\n  [  3] !hal.buffer\n  ...\n\nModule Dependencies:\n  hal, version &gt;= 0, required\n\nImported Functions:\n  [  0] hal.allocator.allocate(!vm.ref&lt;?&gt;, i32, i32, i64) -&gt; (!vm.ref&lt;?&gt;)\n  [  1] hal.devices.get(i32) -&gt; (!vm.ref&lt;?&gt;)\n  ...\n\nExported Functions:\n  [  0] abs(!vm.ref&lt;?&gt;) -&gt; (!vm.ref&lt;?&gt;)\n  [  1] __init() -&gt; ()\n\n...\n</code></pre>"},{"location":"developers/general/developer-tips/#dumping-executable-files","title":"Dumping executable files","text":"<p>The <code>--iree-hal-dump-executable-*</code> flags instruct the compiler to save files related to \"executable translation\" (code generation for a specific hardware target) into a directory of your choosing. If you are interested in seeing which operations in your input program were fused into a compute kernel or what device code was generated for a given program structure, these flags are a great starting point.</p> Flag Files dumped <code>iree-hal-dump-executable-files-to</code> All files (meta-flag) <code>iree-hal-dump-executable-sources-to</code> Source <code>.mlir</code> files prior to HAL compilation <code>iree-hal-dump-executable-intermediates-to</code> Intermediate files (e.g. <code>.o</code> files, <code>.mlir</code> stages) <code>iree-hal-dump-executable-binaries-to</code> Binary files (e.g. <code>.so</code>, <code>.spv</code>, <code>.ptx</code>), as used in the <code>.vmfb</code> <code>iree-hal-dump-executable-benchmarks-to</code> Standalone benchmark files for <code>iree-benchmark-module</code> CPUGPU - VulkanGPU - CUDA <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-link-embedded=false \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_system_elf_x86_64_benchmark.mlir\nmodule_abs_dispatch_0_system_elf_x86_64.codegen.bc\nmodule_abs_dispatch_0_system_elf_x86_64.linked.bc\nmodule_abs_dispatch_0_system_elf_x86_64.optimized.bc\nmodule_abs_dispatch_0_system_elf_x86_64.o\nmodule_abs_dispatch_0_system_elf_x86_64.s\nmodule_abs_dispatch_0_system_elf_x86_64.so\nsimple_abs_cpu.vmfb\n</code></pre> <p>Tip - Embedded and system linking</p> <p>The default value of <code>--iree-llvmcpu-link-embedded=true</code> generates embedded ELF files. By disabling that flag, the compiler will produce platform-standard <code>.so</code> files for Linux, <code>.dll</code> files for Windows, etc. While embedded ELF files can be smaller and more portable, inspection of artifacts is easier with platform-standard shared object files.</p> Tip - Disassembling <code>.bc</code> files with <code>llvm-dis</code> <p>The <code>.bc</code> intermediate files use the LLVM BitCode format, which can be disassembled using <code>llvm-dis</code>:</p> <pre><code>// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_system_elf_x86_64.codegen.bc\n$ cat module_abs_dispatch_0_system_elf_x86_64.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_system_elf_x86_64.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\ntarget triple = \"x86_64-linux-gnu\"\n\n%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }\n%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }\n\n...\n\ndefine internal i32 @abs_dispatch_0_generic(\n    ptr noalias nonnull align 16 %0,\n    ptr noalias nonnull align 16 %1,\n    ptr noalias nonnull align 16 %2) #0 {\n  %4 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10,\n  %6 = load ptr, ptr %5, align 8,\n  %7 = ptrtoint ptr %6 to i64,\n  %8 = and i64 %7, 63,\n  %9 = icmp eq i64 %8, 0,\n  call void @llvm.assume(i1 %9),\n  %10 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n  %11 = extractvalue %iree_hal_executable_dispatch_state_v0_t %10, 10,\n  %12 = getelementptr ptr, ptr %11, i32 1,\n  %13 = load ptr, ptr %12, align 8,\n  %14 = ptrtoint ptr %13 to i64,\n  %15 = and i64 %14, 63,\n  %16 = icmp eq i64 %15, 0,\n  call void @llvm.assume(i1 %16),\n  %17 = load float, ptr %6, align 4,\n  %18 = call float @llvm.fabs.f32(float %17),\n  store float %18, ptr %13, align 4,\n  ret i32 0,\n}\n\n...\n</code></pre> <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=vulkan-spirv \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_vulkan.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb_benchmark.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.spv\nsimple_abs_vulkan.vmfb\n</code></pre> Tip - Disassembling <code>.spv</code> files with <code>spirv-dis</code> <p>The <code>.spv</code> files use the SPIR-V binary format, which can be disassembled using <code>spirv-dis</code> from SPIR-V Tools:</p> <pre><code>$ cd /tmp/iree/simple_abs/\n$ spirv-dis module_abs_dispatch_0_vulkan_spirv_fb.spv\n\n; SPIR-V\n; Version: 1.0\n; Generator: Khronos; 22\n; Bound: 20\n; Schema: 0\n              OpCapability Shader\n              OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n        %18 = OpExtInstImport \"GLSL.std.450\"\n              OpMemoryModel Logical GLSL450\n              OpEntryPoint GLCompute %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n              OpExecutionMode %abs_dispatch_0_generic LocalSize 1 1 1\n              OpName %__resource_var_0_0_ \"__resource_var_0_0_\"\n              OpName %__resource_var_0_1_ \"__resource_var_0_1_\"\n              OpName %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n              OpDecorate %_arr_float_uint_1 ArrayStride 4\n              OpMemberDecorate %_struct_2 0 Offset 0\n              OpDecorate %_struct_2 Block\n              OpDecorate %__resource_var_0_0_ Binding 0\n              OpDecorate %__resource_var_0_0_ DescriptorSet 0\n              OpDecorate %__resource_var_0_1_ Binding 1\n              OpDecorate %__resource_var_0_1_ DescriptorSet 0\n      %float = OpTypeFloat 32\n      %uint = OpTypeInt 32 0\n    %uint_1 = OpConstant %uint 1\n%_arr_float_uint_1 = OpTypeArray %float %uint_1\n  %_struct_2 = OpTypeStruct %_arr_float_uint_1\n%_ptr_StorageBuffer__struct_2 = OpTypePointer StorageBuffer %_struct_2\n%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n      %void = OpTypeVoid\n          %9 = OpTypeFunction %void\n    %uint_0 = OpConstant %uint 0\n%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float\n%abs_dispatch_0_generic = OpFunction %void None %9\n        %12 = OpLabel\n        %15 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %uint_0\n        %16 = OpLoad %float %15\n        %17 = OpExtInst %float %18 FAbs %16\n        %19 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %uint_0\n              OpStore %19 %17\n              OpReturn\n              OpFunctionEnd\n</code></pre> <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=cuda \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cuda.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0_cuda_nvptx_fb_benchmark.mlir\nmodule_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.linked.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.optimized.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.ptx\nmodule_abs_dispatch_0.mlir\nsimple_abs_cuda.vmfb\n</code></pre> Tip - Disassembling <code>.bc</code> files with <code>llvm-dis</code> <p>The <code>.bc</code> intermediate files use the LLVM BitCode format, which can be disassembled using <code>llvm-dis</code>:</p> <pre><code>// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\n$ cat module_abs_dispatch_0_cuda_nvptx_fb.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\n\ndeclare ptr @malloc(i64)\n\ndeclare void @free(ptr)\n\ndeclare float @__nv_fabsf(float)\n\ndefine void @abs_dispatch_0_generic(ptr noalias readonly align 16 %0, ptr noalias align 16 %1) {\n  %3 = ptrtoint ptr %0 to i64\n  %4 = and i64 %3, 63\n  %5 = icmp eq i64 %4, 0\n  call void @llvm.assume(i1 %5)\n  %6 = ptrtoint ptr %1 to i64\n  %7 = and i64 %6, 63\n  %8 = icmp eq i64 %7, 0\n  call void @llvm.assume(i1 %8)\n  %9 = load float, ptr %0, align 4\n  %10 = call float @__nv_fabsf(float %9)\n  store float %10, ptr %1, align 4\n  ret void\n}\n\n!nvvm.annotations = !{!0, !1, !2, !3}\n\n!0 = !{ptr @abs_dispatch_0_generic, !\"kernel\", i32 1}\n!1 = !{ptr @abs_dispatch_0_generic, !\"maxntidx\", i32 1}\n!2 = !{ptr @abs_dispatch_0_generic, !\"maxntidy\", i32 1}\n!3 = !{ptr @abs_dispatch_0_generic, !\"maxntidz\", i32 1}\n</code></pre>"},{"location":"developers/general/developer-tips/#module-level-executable-benchmarks","title":"Module level executable benchmarks","text":"<p>The benchmark files produced by <code>--iree-hal-dump-executable-benchmarks-to</code> can be compiled in isolation and passed to <code>iree-benchmark-module</code>, where they exercise the full IREE runtime for a single executable:</p> <pre><code>$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-dump-executable-benchmarks-to=/tmp/iree/simple_abs/ \\\n  -o /dev/null\n\n$ iree-compile \\\n  /tmp/iree/simple_abs/module_abs_dispatch_0_embedded_elf_x86_64_benchmark.mlir \\\n  -o /tmp/iree/simple_abs/module_abs_dispatch_0_benchmark.vmfb\n\n$ iree-benchmark-module \\\n  /tmp/iree/simple_abs/module_abs_dispatch_0_benchmark.vmfb\n</code></pre>"},{"location":"developers/general/developer-tips/#low-level-executable-binary-benchmarks","title":"Low level executable binary benchmarks","text":"<p>The binary files produced by <code>--iree-hal-dump-executable-binaries-to</code> can be passed to <code>iree-benchmark-executable</code> where they are benchmarked directly, without using the IREE VM, HAL APIs, task system, etc. Note that this interface is much lower level and you must specify all push constants / binding parameters manually:</p> <pre><code>$ iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-dump-executable-binaries-to=/tmp/iree/simple_abs/ \\\n  -o /dev/null\n\n$ iree-benchmark-executable \\\n  --device=local-sync \\\n  --executable_format=embedded-elf-x86_64 \\\n  --executable_file=/tmp/iree/simple_abs/module_abs_dispatch_0_embedded_elf_x86_64.so \\\n  --entry_point=0 \\\n  --binding=f32=-2.5 \\\n  --binding=f32=0 \\\n  --workgroup_count=1,1,1\n</code></pre> <p>See the comments in <code>tools/iree-benchmark-executable-main.c</code> and the test file at <code>tools/test/iree-benchmark-executable.mlir</code> for more information and examples.</p>"},{"location":"developers/general/developer-tips/#compiling-phase-by-phase","title":"Compiling phase by phase","text":"<p>IREE compiles programs through a series of broad phases:</p> <pre><code>graph LR\n  accTitle: Compilation phases overview\n  accDescr: Input to ABI to Flow to Stream to HAL to VM\n\n  A([Input])\n  A --&gt; B([ABI])\n  B --&gt; C([Flow])\n  C --&gt; D([Stream])\n  D --&gt; E([HAL])\n  E --&gt; F([VM])</code></pre> Tip - available phases <p>These are the phase names available for use with the <code>--compile-to</code> and <code>--compile-from</code> flags described below:</p> Phase name Description <code>input</code> Performs input processing and lowering into core IREE input dialects (linalg/etc) <code>abi</code> Adjusts the program ABI for the specified execution environment <code>preprocessing</code> Applies customizable <code>preprocessing</code> prior to FLow/Stream/HAL/VM <code>flow</code> Models execution data flow and partitioning using the <code>flow</code> dialect <code>stream</code> Models execution partitioning and scheduling using the <code>stream</code> dialect <code>executable-sources</code> Prepares <code>hal</code> dialect executables for translation, prior to codegen <code>executable-targets</code> Runs code generation for <code>hal</code> dialect executables <code>hal</code> Finishes <code>hal</code> dialect processing <code>vm</code> Lowers to IREE's abstract virtual machine using the <code>vm</code> dialect <code>end</code> Completes the full compilation pipeline <p>For an accurate list of phases, see the source code or check the help output with a command such as:</p> <pre><code>iree-compile --help | sed -n '/--compile-to/,/--/p' | head -n -1\n</code></pre> <p>You can output a program snapshot at intermediate phases with the <code>--compile-to=&lt;phase name&gt;</code> flag:</p> <pre><code>$ cat simple_abs.mlir\n\nfunc.func @abs(%input : tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %result : tensor&lt;f32&gt;\n}\n\n$ iree-compile simple_abs.mlir --compile-to=abi\n\nmodule {\n  func.func @abs(%arg0: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub} {\n    %0 = hal.tensor.import %arg0 \"input 0\" : !hal.buffer_view -&gt; tensor&lt;f32&gt;\n    %1 = math.absf %0 : tensor&lt;f32&gt;\n    %2 = hal.tensor.export %1 \"output 0\" : tensor&lt;f32&gt; -&gt; !hal.buffer_view\n    return %2 : !hal.buffer_view\n  }\n}\n</code></pre> <p>This is similar to the <code>--mlir-print-ir-after=</code> flag, but at clearly defined pipeline phases.</p> <p>Compilation can be continued from any intermediate phase. This allows for interative workflows - compile to a phase, make edits to the <code>.mlir</code> file, then resume compilation and continue through the pipeline:</p> <pre><code>$ iree-compile simple_abs.mlir --compile-to=abi -o simple_abs_abi.mlir\n\n$ sed \\\n  -e 's/math.absf/math.exp/' \\\n  -e 's/@abs/@exp/' \\\n  simple_abs_abi.mlir &gt; simple_exp_abi.mlir\n\n$ iree-compile simple_exp_abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  -o simple_exp_cpu.vmfb\n</code></pre> <p>or explicitly resume from an intermediate phase with <code>--compile-from=&lt;phase name&gt;</code>:</p> <pre><code>$ iree-compile simple_exp_abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --compile-from=abi \\\n  -o simple_exp_cpu.vmfb\n</code></pre>"},{"location":"developers/general/developer-tips/#dumping-compilation-phases","title":"Dumping compilation phases","text":"<p>The <code>--dump-compilation-phases-to</code> flag can be used to dump program IR after each phase, much like <code>--compile-to</code> but without exiting early:</p> <pre><code>$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --dump-compilation-phases-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs -1v\n\nsimple_abs.1.input.mlir\nsimple_abs.2.abi.mlir\nsimple_abs.3.preprocessing.mlir\nsimple_abs.4.global-optimization.mlir\nsimple_abs.5.flow.mlir\nsimple_abs.6.stream.mlir\nsimple_abs.7.executable-sources.mlir\nsimple_abs.8.executable-configurations.mlir\nsimple_abs.9.executable-targets.mlir\nsimple_abs.10.hal.mlir\nsimple_abs.11.vm.mlir\n</code></pre> <p>As with <code>--compile-to</code>, these files can be used together with <code>--compile-from</code>:</p> <pre><code>$ iree-compile simple_abs.2.abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --compile-from=abi \\\n  -o simple_exp_cpu.vmfb\n</code></pre> <p>All together, these passes can be used to, for example:</p> <ul> <li>speed up triage (\"at which phase do we go wrong\")</li> <li>allow for faster development iteration (snapshot all phases at some baseline,   modify the compiler source, then resume from just before where those changes   impact a pipeline)</li> </ul>"},{"location":"developers/general/release-management/","title":"Release management","text":"<p>IREE cuts automated releases via a workflow that is triggered daily. The only constraint placed on the commit that is released is that it has passed all CI checks. These are published on GitHub with the \"pre-release\" status. For debugging this process, see the Release debugging playbook.</p> <p>We periodically promote one of these candidates to a \"stable\" release by removing the \"pre-release\" status. This makes it show up as a \"latest\" release on GitHub. We also push the Python packages for this release to PyPI.</p>"},{"location":"developers/general/release-management/#picking-a-candidate-to-promote","title":"Picking a candidate to promote","text":"<p>When selecting a candidate we use the following criteria:</p> <ol> <li>\u2a864 days old so that problems with it may have been spotted</li> <li>Contains no P0 regressions vs the previous stable release</li> <li>LLVM submodule commit ideally exists upstream (no cherry picks or patches)</li> </ol> <p>When you've identified a potential candidate, email the iree-discuss list with the proposal and solicit feedback. People may point out known regressions or request that some feature make the cut.</p>"},{"location":"developers/general/release-management/#promoting-a-candidate-to-stable","title":"Promoting a candidate to stable","text":"<ol> <li> <p>(Authorized users only) Push to PyPI using     pypi_deploy.sh</p> <ul> <li>For Googlers, the password is stored at http://go/iree-pypi-password</li> </ul> </li> <li> <p>Open the release on GitHub. Rename the release from \"candidate\" to \"stable\",     uncheck the option for \"pre-release\", and check the option for \"latest\".</p> <p></p> <p></p> </li> </ol>"},{"location":"developers/general/testing-guide/","title":"Testing guide","text":"<p>Like the IREE project in general, IREE tests are divided into a few different components and use different tooling depending on the needs of that component.</p> Test type Test Build system Supported platforms Compiler tests iree_lit_test Bazel/CMake Host Runtime tests iree_cc_test Bazel/CMake Host/Device iree_native_test Bazel/CMake Host/Device iree_hal_cts_test_suite CMake Host/Device Core E2E tests iree_check_test Bazel/CMake Host/Device iree_static_linker_test CMake Host/Device <p>There are also more <code>*_test_suite</code> targets that groups test targets with the same configuration together.</p>"},{"location":"developers/general/testing-guide/#compiler-tests","title":"Compiler tests","text":"<p>Tests for the IREE compilation pipeline are written as lit tests in the same style as MLIR.</p> <p>By convention, IREE includes tests for</p> <ul> <li>printing and parsing of ops in <code>.../IR/test/{OP_CATEGORY}_ops.mlir</code> files</li> <li>folding and canonicalization in <code>.../IR/test/{OP_CATEGORY}_folding.mlir</code> files</li> <li>compiler passes and pipelines in other <code>.../test/*.mlir</code> files</li> </ul>"},{"location":"developers/general/testing-guide/#running-a-test","title":"Running a test","text":"<p>For the test <code>iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir</code></p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir.test\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test //compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/test:arithmetic_ops.mlir.test\n</code></pre>"},{"location":"developers/general/testing-guide/#writing-a-test","title":"Writing a test","text":"<p>For advice on writing MLIR compiler tests, see the MLIR testing guide. Tests should be <code>.mlir</code> files in <code>test</code> directory adjacent to the functionality they are testing. Instead of <code>mlir-opt</code>, use <code>iree-opt</code>, which registers IREE dialects and passes and doesn't register some unnecessary core ones.</p> <p>As with most parts of the IREE compiler, these should not have a dependency on the runtime.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system","title":"Configuring the build system","text":"<p>In the Bazel BUILD file, create a <code>iree_lit_test_suite</code> rule. We usually create a single suite that globs all <code>.mlir</code> files in the directory and is called \"lit\".</p> <pre><code>load(\"//iree/build_tools/bazel:iree_lit_test.bzl\", \"iree_lit_test_suite\")\n\niree_lit_test_suite(\n    name = \"lit\",\n    srcs = glob([\"*.mlir\"]),\n    tools = [\n        \"@llvm-project//llvm:FileCheck\",\n        \"//tools:iree-opt\",\n    ],\n)\n</code></pre> <p>There is a corresponding CMake function, calls to which will be generated by our Bazel to CMake converter.</p> <pre><code>iree_lit_test_suite(\n  NAME\n    lit\n  SRCS\n    \"arithmetic_ops.mlir\"\n  DATA\n    FileCheck\n    iree-opt\n)\n</code></pre> <p>You can also create a test for a single file with <code>iree_lit_test</code>.</p>"},{"location":"developers/general/testing-guide/#runtime-tests","title":"Runtime tests","text":"<p>Tests for the runtime C++ code use the GoogleTest testing framework. They should generally follow the style and best practices of that framework.</p>"},{"location":"developers/general/testing-guide/#running-a-test_1","title":"Running a test","text":"<p>For the test <code>/runtime/src/iree/base/bitfield_test.cc</code>:</p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R iree/base/bitfield_test\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test //runtime/src/iree/base:arena_test\n</code></pre>"},{"location":"developers/general/testing-guide/#setting-test-environments","title":"Setting test environments","text":"<p>Parallel testing for <code>ctest</code> can be enabled via the <code>CTEST_PARALLEL_LEVEL</code> environment variable. For example:</p> <pre><code>export CTEST_PARALLEL_LEVEL=$(nproc)\n</code></pre> <p>To use the Vulkan backend as test driver, you may need to select between a Vulkan implementation from SwiftShader and multiple Vulkan-capable hardware devices. This can be done via environment variables. See the generic Vulkan setup page for details regarding these variables.</p> <p>For Bazel, you can persist the configuration in <code>user.bazelrc</code> to save typing. For example:</p> <pre><code>test:vkswiftshader --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vkswiftshader --test_env=\"VK_LAYER_PATH=...\"\ntest:vknative --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vknative --test_env=\"VK_LAYER_PATH=...\"\n</code></pre> <p>Then you can use <code>bazel test --config=vkswiftshader</code> to select SwiftShader as the Vulkan implementation. Similarly for other implementations.</p>"},{"location":"developers/general/testing-guide/#writing-a-test_1","title":"Writing a test","text":"<p>For advice on writing tests in the GoogleTest framework, see the GoogleTest primer. Test files for source file <code>foo.cc</code> with build target <code>foo</code> should live in the same directory with source file <code>foo_test.cc</code> and build target <code>foo_test</code>. You should <code>#include</code> <code>iree/testing/gtest.h</code> instead of any of the gtest or gmock headers.</p> <p>As with all parts of the IREE runtime, these should not have a dependency on the compiler.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system_1","title":"Configuring the build system","text":"<p>In the Bazel BUILD file, create a <code>cc_test</code> target with your test file as the source and any necessary dependencies. Usually, you can link in a standard gtest main function. Use <code>iree/testing:gtest_main</code> instead of the <code>gtest_main</code> that comes with gtest.</p> <pre><code>cc_test(\n    name = \"arena_test\",\n    srcs = [\"arena_test.cc\"],\n    deps = [\n        \":arena\",\n        \"//iree/testing:gtest_main\",\n    ],\n)\n</code></pre> <p>We have created a corresponding CMake function <code>iree_cc_test</code> that mirrors the Bazel rule's behavior. Our Bazel to CMake converter should generally derive the <code>CMakeLists.txt</code> file from the BUILD file:</p> <pre><code>iree_cc_test(\n  NAME\n    arena_test\n  SRCS\n    \"arena_test.cc\"\n  DEPS\n    ::arena\n    iree::testing::gtest_main\n)\n</code></pre> <p>There are other more specific test targets, such as <code>iree_hal_cts_test_suite</code>, which are designed to test specific runtime support with template configuration and is not supported by Bazel rules.</p>"},{"location":"developers/general/testing-guide/#iree-core-end-to-end-e2e-tests","title":"IREE core end-to-end (e2e) tests","text":"<p>Here \"end-to-end\" means from the input accepted by the IREE core compiler (dialects like TOSA, StableHLO, Linalg) to execution using the IREE runtime components. It does not include tests of the integrations with ML frameworks (e.g. TensorFlow, PyTorch) or bindings to other languages (e.g. Python).</p> <p>We avoid using the more traditional <code>lit</code> tests used elsewhere in the compiler for runtime execution tests. Lit tests require running the compiler tools on the test platform through shell or python scripts that act on files from a local file system. On platforms like Android, the web, and embedded systems, each of these features is either not available or is severely limited.</p> <p>Instead, to test these flows we use a custom framework called <code>check</code>. The check framework compiles test programs on the host machine into standalone test binary files that can be pushed to test devices (such as Android phones) where they run with gtest style assertions (e.g. <code>check.expect_almost_eq(lhs, rhs)</code>).</p>"},{"location":"developers/general/testing-guide/#building-e2e-tests","title":"Building e2e tests","text":"<p>The files needed by these tests are not built by default with CMake. You'll need to build the special <code>iree-test-deps</code> target to generate test files prior to running CTest (from the build directory):</p> <pre><code>cmake --build . --target iree-test-deps\n</code></pre> <p>To run e2e model tests in generated_e2e_model_tests.cmake, because of their dependencies, <code>-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON</code> needs to be set when configuring CMake. Also see IREE Benchmark Suite Prerequisites for required packages.</p>"},{"location":"developers/general/testing-guide/#running-a-test_2","title":"Running a Test","text":"<p>For the test <code>tests/e2e/stablehlo_ops/floor.mlir</code> compiled for the VMVX target backend and running on the VMVX driver (here they match exactly, but in principle there's a many-to-many mapping from backends to drivers).</p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R tests/e2e/stablehlo_ops/check_vmvx_local-task_floor.mlir\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test tests/e2e/stablehlo_ops:check_vmvx_local-task_floor.mlir\n</code></pre>"},{"location":"developers/general/testing-guide/#setting-test-environments_1","title":"Setting test environments","text":"<p>Similarly, you can use environment variables to select Vulkan implementations for running tests as explained in the Runtime tests section.</p>"},{"location":"developers/general/testing-guide/#writing-a-test_2","title":"Writing a test","text":"<p>These tests live in <code>tests/e2e</code>. A single test consists of a <code>.mlir</code> source file specifying an IREE module where each exported function takes no inputs and returns no results and corresponds to a single test case.</p> <p>As an example, here are some tests for the MHLO floor operation:</p> <pre><code>func.func @tensor() {\n  %input = util.unfoldable_constant dense&lt;[0.0, 1.1, 2.5, 4.9]&gt; : tensor&lt;4xf32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;[0.0, 1.0, 2.0, 4.0]&gt; : tensor&lt;4xf32&gt;): tensor&lt;4xf32&gt;\n  return\n}\n\nfunc.func @scalar() {\n  %input = util.unfoldable_constant dense&lt;101.3&gt; : tensor&lt;f32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;101.0&gt; : tensor&lt;f32&gt;): tensor&lt;f32&gt;\n  return\n}\n\nfunc.func @negative() {\n  %input = util.unfoldable_constant dense&lt;-1.1&gt; : tensor&lt;f32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;-2.0&gt; : tensor&lt;f32&gt;): tensor&lt;f32&gt;\n  return\n}\n</code></pre> <p>Test cases are created in gtest for each public function exported by the module.</p> <p>Note the use of <code>util.unfoldable_constant</code> to specify test constants. If we were to use a regular constant the compiler would fold away everything at compile time and our test would not actually test the runtime. <code>unfoldable_constant</code> adds a barrier that prevents folding. To prevent folding/constant propagate on an arbitrary SSA-value you can use <code>util.optimization_barrier</code>.</p> <p>Next we use this input constant to exercise the runtime feature under test (in this case, just a single floor operation). Finally, we use a check dialect operation to make an assertion about the output. There are a few different assertion operations. Here we use the <code>expect_almost_eq_const</code> op: almost because we are comparing floats and want to allow for floating-point imprecision, and const because we want to compare it to a constant value. This last part is just syntactic sugar around</p> <pre><code>%expected = arith.constant dense&lt;101.0&gt; : tensor&lt;f32&gt;\ncheck.expect_almost_eq(%result, %expected) : tensor&lt;f32&gt;\n</code></pre> <p>The output of running this test looks like:</p> <pre><code>[==========] Running 4 tests from 1 test suite.\n[----------] Global test environment set-up.\n[----------] 4 tests from module\n[ RUN      ] module.tensor\n[       OK ] module.tensor (76 ms)\n[ RUN      ] module.scalar\n[       OK ] module.scalar (79 ms)\n[ RUN      ] module.double\n[       OK ] module.double (55 ms)\n[ RUN      ] module.negative\n[       OK ] module.negative (54 ms)\n[----------] 4 tests from module (264 ms total)\n\n[----------] Global test environment tear-down\n[==========] 4 tests from 1 test suite ran. (264 ms total)\n[  PASSED  ] 4 tests.\n</code></pre> <p>The \"module\" name for the test suite comes from the default name for an implicit MLIR module. To give the test suite a more descriptive name, use an explicit named top-level module in this file.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system_2","title":"Configuring the build system","text":"<p>A single <code>.mlir</code> source file can be turned into a test target with the <code>iree_check_test</code> Bazel macro (and corresponding CMake function).</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test\")\n\niree_check_test(\n    name = \"check_vmvx_local-task_floor.mlir\",\n    src = \"floor.mlir\",\n    driver = \"local-task\",\n    target_backend = \"vmvx\",\n)\n</code></pre> <p>The target naming convention is \"check_backend_driver_src\". The generated test will automatically be tagged with a \"driver=vmvx\" tag, which can help filter tests by backend (especially when many tests are generated, as below).</p> <p>Usually we want to create a suite of tests across many backends and drivers. This can be accomplished with additional macros. For a single backend/driver pair:</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_single_backend_test_suite\")\n\niree_check_single_backend_test_suite(\n    name = \"check_vmvx_local-task\",\n    srcs = glob([\"*.mlir\"]),\n    driver = \"local-task\",\n    target_backend = \"vmvx\",\n)\n</code></pre> <p>This will generate a separate test target for each file in <code>srcs</code> with a name following the convention above as well as a Bazel test_suite called \"check_vmvx_local-task\" that will run all the generated tests.</p> <p>You can also generate suites across multiple pairs:</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test_suite\")\n\niree_check_test_suite(\n    name = \"check\",\n    srcs = [\"success.mlir\"],\n    # Leave this argument off to run on all supported backend/driver pairs.\n    target_backends_and_drivers = [\n        (\"vmvx\", \"local-task\"),\n        (\"vulkan-spirv\", \"vulkan\"),\n    ],\n)\n</code></pre> <p>This will create a test per source file and backend/driver pair, a test suite per backend/driver pair, and a test suite, \"check\", that will run all the tests.</p> <p>The CMake functions follow a similar pattern. The calls to them are generated in our <code>CMakeLists.txt</code> file by bazel_to_cmake.</p> <p>There are other test targets that generate tests based on template configuraton and platform detection, such as <code>iree_static_linker_test</code>. Those targets are not supported by Bazel rules at this point.</p>"},{"location":"developers/general/testing-guide/#external-test-suite","title":"External test suite","text":"<p>An out-of-tree test suite is under development at nod-ai/SHARK-TestSuite for large collections of generated tests and machine learning models that are too large to fit into the main git repository.</p> <p>Testing these programs follows several stages:</p> <pre><code>graph LR\n  Import -. \"\\n(offline)\" .-&gt; Compile\n  Compile --&gt; Run</code></pre> <p>This particular test suite treats importing (e.g. from ONNX, PyTorch, or TensorFlow) as an offline step and contains test cases organized into folders of programs, inputs, and expected outputs:</p> Sample test case directory<pre><code>test_case_name/\n  model.mlir\n  input_0.npy\n  output_0.npy\n  test_data_flags.txt\n</code></pre> Sample test_data_flags.txt<pre><code>--input=@input_0.npy\n--expected_output=@output_0.npy\n</code></pre> <ul> <li>Many model, input, and output files are too large to store directly in Git, so the external test suite also uses Git LFS and cloud storage.</li> </ul> <p>Each test case can be run using a sequence of commands like:</p> <pre><code>iree-compile model.mlir {flags} -o model.vmfb\niree-run-module --module=model.vmfb --flagfile=test_data_flags.txt\n</code></pre> <p>To run slices of the test suite, a pytest runner is included that can be configured using JSON files. The JSON files tested in the IREE repo itself are stored in <code>build_tools/pkgci/external_test_suite/</code>.</p> <p>For example, here is part of a config file for running ONNX tests on CPU:</p> build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json<pre><code>{\n  \"config_name\": \"cpu_llvm_sync\",\n  \"iree_compile_flags\": [\n    \"--iree-hal-target-backends=llvm-cpu\"\n  ],\n  \"iree_run_module_flags\": [\n    \"--device=local-sync\"\n  ],\n  \"skip_compile_tests\": [\n    \"test_dequantizelinear\",\n    \"test_slice_default_axes\"\n  ],\n  \"skip_run_tests\": [],\n  \"expected_compile_failures\": [\n    \"test_acos\",\n    \"test_acos_example\",\n    \"test_acosh\",\n    \"test_acosh_example\",\n    \"test_adagrad\",\n    \"test_adagrad_multiple\",\n</code></pre>"},{"location":"developers/general/testing-guide/#adding-new-test-cases","title":"Adding new test cases","text":"<p>To add new test cases to the external test suite:</p> <ol> <li>Import the programs you want to test into MLIR. This can be done manually or    using automation. Prefer to automate, or at least document, the process so    test cases can be regenerated later.</li> <li>Construct sets of inputs and expected outputs (as .npy or .bin files). These    can be manually authored or imported by running the program through a    reference backend.</li> <li>Group the program, inputs, and outputs together using a flagfile.</li> </ol> <p>To start running new test cases:</p> <ol> <li>Bump the commit of the test suite that is used in IREE's    <code>.github/workflows/</code> files</li> <li>Add new pytest invocations and/or config files that run the new tests</li> </ol>"},{"location":"developers/general/testing-guide/#usage-from-other-projects","title":"Usage from other projects","text":"<p>The external test suite only needs <code>iree-compile</code> and <code>iree-run-module</code> to run, so it is well suited for use in downstream projects that implement plugins for IREE. The <code>conftest.py</code> file can also be forked (or bypassed entirely) to further customize the test runner behavior.</p>"},{"location":"developers/performance/benchmark-suites/","title":"Benchmark suites","text":"<p>IREE Benchmarks Suites is a collection of benchmarks for IREE developers to track performance improvements/regressions during development.</p> <p>The benchmark suites are run for each commit on the main branch and the results are uploaded to https://perf.iree.dev for regression analysis (for the current supported targets). On pull requests, users can add labels <code>benchmarks:*</code> to trigger the benchmark runs. The results will be compared with https://perf.iree.dev and post in the comments.</p> <p>Information about the definitions of the benchmark suites can be found in the IREE Benchmark Suites Configurations.</p>"},{"location":"developers/performance/benchmark-suites/#running-benchmark-suites-locally","title":"Running benchmark suites locally","text":""},{"location":"developers/performance/benchmark-suites/#prerequisites","title":"Prerequisites","text":"<p>Install <code>iree-import-tf</code> and <code>iree-import-tflite</code> in your Python environment (see Tensorflow Integration and TFLite Integration).</p>"},{"location":"developers/performance/benchmark-suites/#choose-benchmark-presets","title":"Choose benchmark presets","text":"<p>IREE Benchmark Suites contain many benchmarks for different devices and model sizes, which can take lots of space and time to build all of them. So benchmarks are grouped into presets to allow building and running only a subset of them. The available presets are:</p> <p>Execution benchmarks:</p> <ul> <li><code>android-cpu</code>: benchmarks for mobile CPUs</li> <li><code>android-gpu</code>: benchmarks for mobile GPUs</li> <li><code>cuda</code>: benchmarks for CUDA with a small model set</li> <li><code>cuda-large</code>: benchmarks for CUDA with a large model set</li> <li><code>vulkan-nvidia</code>: benchmarks for Vulkan on NVIDIA graphics cards</li> <li><code>x86_64</code>: benchmarks for x86_64 CPUs with a small model set</li> <li><code>x86_64-large</code>: benchmarks for x86_64 with a large model set</li> </ul> <p>Compilation benchmarks (to collect compilation statistics, such as module sizes):</p> <ul> <li><code>comp-stats</code>: compilation benchmarks with a small model set</li> <li><code>comp-stats-large</code>: compilation benchmark with a large model set</li> </ul> <p>Note that <code>*-large</code> presets will download and build a few hundreds GBs of artifacts.</p> <p>Set the environment variables of benchmark presets for the steps below, for example:</p> <pre><code>export EXECUTION_BENCHMARK_PRESETS=\"cuda,x86_64\"\nexport COMPILATION_BENCHMARK_PRESETS=\"comp-stats\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#build-benchmark-suites","title":"Build benchmark suites","text":"<p>Configure IREE with <code>-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON</code>:</p> <pre><code>cmake -GNinja -B \"${IREE_BUILD_DIR?}\" -S \"${IREE_REPO?}\" \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  -DCMAKE_C_COMPILER=clang \\\n  -DCMAKE_CXX_COMPILER=clang++ \\\n  -DIREE_ENABLE_LLD=ON \\\n  -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON\n</code></pre> <p>If you only need the imported MLIR models:</p> <pre><code>cmake --build \"${IREE_BUILD_DIR?}\" --target \\\n  iree-benchmark-import-models\n  # For large benchmarks (this will take &gt; 100G disk space)\n  # iree-benchmark-import-models-large\n</code></pre> <p>Otherwise, compile the benchmark suites and tools for benchmarking:</p> <pre><code>cmake --build \"${IREE_BUILD_DIR?}\" --target \\\n  iree-benchmark-suites \\\n  # If any *-large preset is enabled, also build this target:\n  # iree-benchmark-suites-large \\\n  iree-benchmark-module\nexport E2E_TEST_ARTIFACTS_DIR=\"${IREE_BUILD_DIR?}/e2e_test_artifacts\"\n</code></pre> <p>TODO(#13683): Each preset should have its own target to further reduce unnecessary builds</p>"},{"location":"developers/performance/benchmark-suites/#run-benchmarks","title":"Run benchmarks","text":"<p>Export the execution benchmark config:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py execution \\\n  --benchmark_presets=\"${EXECUTION_BENCHMARK_PRESETS?}\" \\\n  &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n</code></pre> <p>Run benchmarks (currently only support running on a Linux host):</p> <pre><code>build_tools/benchmarks/run_benchmarks_on_linux.py \\\n  --normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --target_device_name=\"&lt;target_device_name, e.g. c2-standard-60&gt;\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n  --verbose \\\n  --cpu_uarch=\"&lt;host CPU uarch, e.g. CascadeLake&gt;\"\n# Traces can be collected by adding:\n# --traced_benchmark_tool_dir=\"${IREE_TRACED_BUILD_DIR?}/tools\" \\\n# --trace_capture_tool=/path/to/iree-tracy-capture \\\n# --capture_tarball=captured_tracy_files.tar.gz\n</code></pre> <p>Note that:</p> <ul> <li><code>&lt;target_device_name&gt;</code> selects a benchmark group targets a specific device:<ul> <li>Common options:<ul> <li><code>c2-standard-60</code> for x86_64 CPU benchmarks.</li> <li><code>a2-highgpu-1g</code> for NVIDIA GPU benchmarks.</li> </ul> </li> <li>All device names are defined under     build_tools/python/e2e_test_framework/device_specs.</li> </ul> </li> <li>To run x86_64 benchmarks, right now <code>--cpu_uarch</code> needs to be provided and     only <code>CascadeLake</code> is available currently.</li> <li>To build traced benchmark tools, see     Profiling with Tracy.</li> </ul> <p>Filters can be used to select the benchmarks:</p> <pre><code>build_tools/benchmarks/run_benchmarks_on_linux.py \\\n  --normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --target_device_name=\"c2-standard-60\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n  --verbose \\\n  --cpu_uarch=\"CascadeLake\" \\\n  --model_name_regex=\"MobileBert*\" \\\n  --driver_filter_regex='local-task' \\\n  --mode_regex=\"4-thread\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#generate-compilation-statistics-compilation-benchmarks","title":"Generate compilation statistics (compilation benchmarks)","text":"<p>Export the compilation benchmark config:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py compilation \\\n  --benchmark_presets=\"${COMPILATION_BENCHMARK_PRESETS?}\" \\\n  &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre> <p>Generate the compilation statistics:</p> <pre><code>build_tools/benchmarks/collect_compilation_statistics.py \\\n  --compilation_benchmark_config=comp_config.json \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --build_log=\"${IREE_BUILD_DIR?}/.ninja_log\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/compile_stats_results.json\"\n</code></pre> <p>Note that you need to use Ninja to build the benchmark suites as the tool collects information from its build log.</p>"},{"location":"developers/performance/benchmark-suites/#show-execution-compilation-benchmark-results","title":"Show execution / compilation benchmark results","text":"<p>If you want to generate a comparison report locally, you can use diff_local_benchmarks.py script to compare two result json files and generate the report. For example:</p> <pre><code>build_tools/benchmarks/diff_local_benchmarks.py \\\n  --base \"${E2E_TEST_ARTIFACTS_DIR?}/before_benchmark_results.json\" \\\n  --target \"${E2E_TEST_ARTIFACTS_DIR?}/after_benchmark_results.json\" \\\n  &gt; report.md\n</code></pre> <p>An example that compares compilation statistics:</p> <pre><code>build_tools/benchmarks/diff_local_benchmarks.py \\\n  --base-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/before_compile_stats_results.json\" \\\n  --target-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/after_compile_stats_results.json\" \\\n  &gt; report.md\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#find-compile-and-run-commands-to-reproduce-benchmarks","title":"Find compile and run commands to reproduce benchmarks","text":"<p>Each benchmark has its benchmark ID in the benchmark suites, you will see a benchmark ID at:</p> <ul> <li>In the serie's URL of https://perf.iree.dev<ul> <li>Execution benchmark: <code>https://perf.iree.dev/serie?IREE?&lt;benchmark_id&gt;</code></li> <li>Compilation benchmark:     <code>https://perf.iree.dev/serie?IREE?&lt;benchmark_id&gt;-&lt;metric_id&gt;</code></li> </ul> </li> <li>In <code>benchmark_results.json</code> and <code>compile_stats_results.json</code><ul> <li>Execution benchmark result has a field <code>run_config_id</code></li> <li>Compilation benchmark result has a field <code>gen_config_id</code></li> </ul> </li> <li>In PR benchmark summary or the markdown generated by     <code>diff_local_benchmarks.py</code>, each benchmark has the link to its     https://perf.iree.dev URL, which includes the benchmark ID.</li> </ul> <p>If you don't have artifacts locally, see Fetching Benchmark Artifacts from CI to find the GCS directory of the CI artifacts. Then fetch the needed files:</p> <pre><code># Get ${E2E_TEST_ARTIFACTS_DIR_URL} from \"Fetching Benchmark Artifacts from CI\".\nexport E2E_TEST_ARTIFACTS_DIR=\"e2e_test_artifacts\"\n\n# Download all artifacts\nmkdir \"${E2E_TEST_ARTIFACTS_DIR?}\"\ngcloud storage cp -r \"${E2E_TEST_ARTIFACTS_DIR_URL?}\" \"${E2E_TEST_ARTIFACTS_DIR?}\"\n</code></pre> <p>Run the helper tool to dump benchmark commands from benchmark configs:</p> <pre><code>build_tools/benchmarks/benchmark_helper.py dump-cmds \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/execution-benchmark-config.json\" \\\n  --compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/compilation-benchmark-config.json\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --benchmark_id=\"&lt;benchmark_id&gt;\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#get-full-list-of-benchmarks","title":"Get full list of benchmarks","text":"<p>The commands below output the full list of execution and compilation benchmarks, including the benchmark names and their flags:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py execution &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\nbuild_tools/benchmarks/export_benchmark_config.py compilation &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\nbuild_tools/benchmarks/benchmark_helper.py dump-cmds \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#fetching-benchmark-artifacts-from-ci","title":"Fetching benchmark Artifacts from CI","text":""},{"location":"developers/performance/benchmark-suites/#1-find-the-corresponding-ci-workflow-run","title":"1. Find the corresponding CI workflow run","text":"<p>On the commit of the benchmark run, you can find the list of the workflow jobs by clicking the green check mark. Click any job starts with <code>CI /</code>:</p> <p></p>"},{"location":"developers/performance/benchmark-suites/#2-get-urls-of-gcs-artifacts","title":"2. Get URLs of GCS artifacts","text":"<p>On the CI page, click <code>Summary</code> on the top-left to open the summary page. Scroll down and the links to artifacts are listed in a section titled \"Artifact Links\". Paste the content in your shell to define all needed variables for the following steps:</p> <p></p>"},{"location":"developers/performance/benchmark-suites/#3-fetch-the-benchmark-artifacts","title":"3. Fetch the benchmark artifacts","text":"<p>To fetch files from the GCS URL, the gcloud CLI tool (https://cloud.google.com/sdk/docs/install) can list the directory contents and download files (see https://cloud.google.com/sdk/gcloud/reference/storage for more usages). If you want to use CI artifacts to reproduce benchmarks locally, see Find Compile and Run Commands to Reproduce Benchmarks.</p> <p>Assume you get the GCS URL variables from Get URLs of GCS artifacts.</p> <p>Download artifacts:</p> <pre><code># The GCS directory has the same structure as your local ${IREE_BUILD_DIR?}/e2e_test_artifacts.\ngcloud storage ls \"${E2E_TEST_ARTIFACTS_DIR_URL?}\"\n\n# Download all source and imported MLIR files:\ngcloud storage cp \"${E2E_TEST_ARTIFACTS_DIR_URL?}/*.mlir\" \"&lt;target_dir&gt;\"\n</code></pre> <p>Execution and compilation benchmark configs can be downloaded at:</p> <pre><code># Execution benchmark config:\ngcloud storage cp \\\n  \"${E2E_TEST_ARTIFACTS_DIR_URL?}/execution-benchmark-config.json\" \\\n  \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n\n# Compilation benchmark config:\ngcloud storage cp \\\n  \"${E2E_TEST_ARTIFACTS_DIR_URL?}/compilation-benchmark-config.json\" \\\n  \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre> <p>Benchmark raw results and traces can be downloaded at:</p> <pre><code># Execution benchmark raw results\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-results-*.json\" .\n\n# Optional: Merge raw results into a single file\nbuild_tools/benchmarks/benchmark_helper.py merge-results benchmark-results-*.json &gt; benchmark_results.json\n\n# Execution benchmark traces\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-traces-*.tar.gz\" .\n\n# Compilation benchmark results\ngcloud storage cp \"${COMPILATION_BENCHMARK_RESULTS_URL?}\" .\n</code></pre>"},{"location":"developers/performance/benchmarking/","title":"Benchmarking","text":"<p>IREE uses benchmarks to inspect performance at varying levels of granularity. Benchmarking is implemented using the Google Benchmark library. To understand performance details and guide optimization, please refer to the IREE profiling documentation.</p>"},{"location":"developers/performance/benchmarking/#module-benchmarks","title":"Module Benchmarks","text":"<p><code>iree-benchmark-module</code> is a program accepting (almost) the same inputs as <code>iree-run-module</code> that will benchmark the invocation of a single entry function. It measures timing for the whole process of invoking a function through the VM, including allocating and freeing output buffers. This is a high-level benchmark of an entire invocation flow. It provides a big picture view, but depends on many different variables, like an integration test. For finer-grained measurements more akin to unit tests, see Executable Benchmarks.</p> <p>To use <code>iree-benchmark-module</code>, generate an IREE module for the target backend:</p> <pre><code>$ bazel run //tools:iree-compile -- \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  -o /tmp/module.fb\n</code></pre> <p>and then benchmark an exported function in that module:</p> <pre><code>$ bazel run //tools:iree-benchmark-module -- \\\n  --module=/tmp/module.fb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <p>You'll see output like</p> <pre><code>Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n  L1 Data 32K (x6)\n  L1 Instruction 32K (x6)\n  L2 Unified 1024K (x6)\n  L3 Unified 8448K (x1)\nLoad Average: 2.21, 1.93, 3.34\n***WARNING*** CPU scaling is enabled, the benchmark real time measurements may\n be noisy and will incur extra overhead.\n***WARNING*** Library was built as DEBUG. Timings may be affected.\n------------------------------------------------------------------------------\nBenchmark                                    Time             CPU   Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time       0.22 ms         0.23 ms         3356\n</code></pre> <p>Notice that there are a few warnings in there (you may not see all of these). The benchmark library helpfully warns about some common issues that will affect benchmark timing. When trying to obtain real benchmark numbers, you should generally build an optimized build (<code>-c opt</code> in Bazel) and disable CPU scaling.</p> <pre><code>bazel build -c opt //tools:iree-benchmark-module\n</code></pre> <p>Another thing to consider is that depending on where you are running the benchmark you might want to avoid additional programs running at the same time. Bazel itself runs a server even when it's not being actively invoked that can be quite a memory hog, so we'll instead invoke the binary directly. Use your favorite process manager (e.g. htop or pkill on Linux) to kill heavy-weight programs such as Chrome and Bazel.</p> <p>Now we'll actually invoke the binary:</p> <pre><code>$ ./bazel-bin/tools/iree-benchmark-module \\\n  --module=/tmp/module.fb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <pre><code>Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n  L1 Data 32K (x6)\n  L1 Instruction 32K (x6)\n  L2 Unified 1024K (x6)\n  L3 Unified 8448K (x1)\nLoad Average: 1.49, 3.42, 3.49\n------------------------------------------------------------------------------\nBenchmark                                    Time             CPU   Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time      0.011 ms        0.014 ms        61654\n</code></pre> <p>Remember to restore CPU scaling when you're done.</p>"},{"location":"developers/performance/benchmarking/#executable-benchmarks","title":"Executable Benchmarks","text":"<p>We also benchmark the performance of individual parts of the IREE system in isolation. IREE breaks a model down to dispatch functions. To benchmark all the dispatch functions, generate an IREE module with the <code>-iree-flow-export-benchmark-funcs</code> flag set:</p> <pre><code>$ build/tools/iree-compile \\\n  --iree-input-type=stablehlo \\\n  --iree-flow-export-benchmark-funcs \\\n  --iree-hal-target-backends=vmvx \\\n  tests/e2e/stablehlo_models/fullyconnected.mlir \\\n  -o /tmp/fullyconnected.vmfb\n</code></pre> <p>and then benchmark all exported dispatch functions (and all exported functions) in that module:</p> <pre><code>$ build/tools/iree-benchmark-module\n  --module=/tmp/fullyconnected.vmfb\n  --device=local-task\n</code></pre> <p>If no <code>entry_function</code> is specified, <code>iree-benchmark-module</code> will register a benchmark for each exported function that takes no inputs.</p> <p>You will see output like:</p> <pre><code>Run on (72 X 3700 MHz CPU s)\nCPU Caches:\n  L1 Data 32 KiB (x36)\n  L1 Instruction 32 KiB (x36)\n  L2 Unified 1024 KiB (x36)\n  L3 Unified 25344 KiB (x2)\nLoad Average: 4.39, 5.72, 6.76\n---------------------------------------------------------------------------------------------\nBenchmark                                                   Time             CPU   Iterations\n---------------------------------------------------------------------------------------------\nBM_main_ex_dispatch_0_benchmark/process_time/real_time  0.030 ms        0.037 ms        34065\nBM_main_ex_dispatch_1_benchmark/process_time/real_time  0.034 ms        0.042 ms        20567\nBM_main_ex_dispatch_2_benchmark/process_time/real_time  0.043 ms        0.051 ms        18576\nBM_main_ex_dispatch_3_benchmark/process_time/real_time  0.029 ms        0.036 ms        21345\nBM_main_ex_dispatch_4_benchmark/process_time/real_time  0.042 ms        0.051 ms        15880\nBM_main_ex_dispatch_5_benchmark/process_time/real_time  0.030 ms        0.037 ms        17854\nBM_main_ex_dispatch_6_benchmark/process_time/real_time  0.043 ms        0.052 ms        14919\nBM_main_benchmark/process_time/real_time                0.099 ms        0.107 ms         5892\n</code></pre>"},{"location":"developers/performance/benchmarking/#bytecode-module-benchmarks","title":"Bytecode Module Benchmarks","text":"<p>Normally, the IREE VM is expected to be integrated into applications and driving model execution. So its performance is of crucial importance. We strive to introduce as little overhead as possible and have several benchmark binaries dedicated for evaluating the VM's performance. These benchmark binaries are named as <code>*_benchmark</code> in the <code>iree/vm/</code> directory. They also use the Google Benchmark library as the above.</p>"},{"location":"developers/performance/benchmarking/#cpu-configuration","title":"CPU Configuration","text":"<p>When benchmarking, it's important to consider the configuration of your CPUs. Most notably, CPU scaling can give variable results, so you'll usually want to disable it. This can get pretty complex, but the most basic thing to do is to run all CPUs at maximum frequency. The other thing to consider is what CPU(s) your program is running on. Both of these get more complicated on mobile and in multithreaded workloads.</p>"},{"location":"developers/performance/benchmarking/#linux","title":"Linux","text":"<p>Google benchmark provides some instructions. Note that the library will print \"CPU scaling is enabled\" warnings for any configuration that doesn't have the quota governor set to performance. Similarly the CPU frequency it reports is the maximum frequency of cpu0, not the frequency of the processor it's actually running on. This means that more advanced configurations should ignore these messages.</p> <p>Turn off CPU scaling before benchmarking.</p> <pre><code>sudo cpupower frequency-set --governor performance\n</code></pre> <p>Restore CPU scaling after benchmarking:</p> <pre><code>sudo cpupower frequency-set --governor powersave\n</code></pre> <p>To learn more about different quota governor settings, see https://www.kernel.org/doc/Documentation/cpu-freq/governors.txt. To restrict which CPUs you run on, use the <code>taskset</code> command which takes a hexadecimal mask.</p> <p>To only run on the lowest-numbered CPU you can run</p> <pre><code>taskset 1 sleep 20 &amp;\n</code></pre> <p>You can confirm that the process is running on the given CPU:</p> <pre><code>ps -o psr $!\n</code></pre> <p>Note that <code>$!</code> indicates the process ID of the last executed background command, so you can only use this shorthand if you didn't run any commands after the sleep. For more info on taskset, see https://linux.die.net/man/1/taskset.</p>"},{"location":"developers/performance/benchmarking/#android","title":"Android","text":"<p>Read and understand the Linux instructions first.</p> <p>Android doesn't give us quite as nice tooling, but the principle is basically the same. One important difference is that thermal throttling is a much bigger concern on mobile. Without a cooling plate, it is likely that high clock speeds will overheat the device and engage thermal throttling, which will ignore whatever clock speeds you may have set to prevent things from catching on fire. Therefore the naive approach above is likely not a good idea.</p> <p>You will likely need to be root (use <code>su</code> or <code>adb root</code>). The commands will depend on your exact phone and number of cores. First play around and make sure you understand what everything means. Note that each CPU has its own files which are used to control its behavior, but changes to a single CPU will sometimes affect others (see <code>/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus</code>).</p> <p>Some useful files:</p> <pre><code>/proc/cpuinfo\n/sys/devices/system/cpu/possible\n/sys/devices/system/cpu/present\n/sys/devices/system/cpu/cpu0/online\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq\n/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed\n</code></pre> <p>See the clockspeed of each CPU</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n    paste \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_cur_freq\" \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_min_freq\" \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_max_freq\"; \\\ndone\n</code></pre> <p>Before changing things, make sure to check the current scaling governor settings first so you can put them back when you're done.</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n    cat \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre>"},{"location":"developers/performance/benchmarking/#single-core-example","title":"Single-Core Example","text":"<p>Here's an example to run IREE in a single-threaded context on CPU 7 at its lowest clock speed.</p> <p>First we'll take control of the clockspeed by setting the governor to \"userspace\".</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n  echo userspace &gt; \\\n    \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre> <p>We can now set individual clock speeds. We'll pin cpu7 to its minimum frequency. We choose the minimum instead of the maximum here to mitigate thermal throttling concerns</p> <pre><code>$ cat /sys/devices/system/cpu/cpu7/cpufreq/cpuinfo_min_freq &gt; \\\n/sys/devices/system/cpu/cpu7/cpufreq/scaling_setspeed\n</code></pre> <p>We can confirm the frequencies of all the CPUs by running the same command above. Now to run a command specifically on cpu7, use <code>taskset 80</code> (hex for 10000000):</p> <pre><code>taskset 80 sleep 20 &amp;\nps -o psr $!\n</code></pre> <p>Remember to cleanup when you're done! Here we'll set the scaling governor back to schedutil because that's what they were before on the particular device this, was tested on, but that may not exist on all devices.</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n  echo schedutil &gt; \\\n    \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre>"},{"location":"developers/performance/benchmarking/#android-scripts","title":"Android Scripts","text":"<p>We provide a few scripts to set clockspeeds on Android (under <code>build_tools/benchmarks</code>). These are somewhat device-specific:</p> <ul> <li>The <code>set_android_scaling_governor.sh</code> work on all CPUs, but the default   governor name may be different across devices.</li> <li>The <code>set_*_gpu_scaling_policy.sh</code> script used should match the actual GPU on   your device.</li> </ul> <p>Sample configuration steps for Pixel 6:</p> <ol> <li>Copy all scripts to the device:</li> </ol> <pre><code>adb push build_tools/benchmarks/*.sh /data/local/tmp\n</code></pre> <ol> <li>Launch interactive adb shell as super user:</li> </ol> <pre><code>adb shell\noriole:/ # su\noriole:/ # cd /data/local/tmp\n</code></pre> <ol> <li>Pin frequencies (high clockspeeds):</li> </ol> <pre><code>oriole:/ # ./set_android_scaling_governor.sh\n CPU info (before changing governor):\n cpu     governor        cur     min     max\n ------------------------------------------------\n cpu0    sched_pixel     1098000 300000  1803000\n cpu1    sched_pixel     1598000 300000  1803000\n cpu2    sched_pixel     1598000 300000  1803000\n cpu3    sched_pixel     1098000 300000  1803000\n cpu4    sched_pixel     400000  400000  2253000\n cpu5    sched_pixel     400000  400000  2253000\n cpu6    sched_pixel     500000  500000  2802000\n cpu7    sched_pixel     500000  500000  2802000\n Setting CPU frequency governor to performance\n CPU info (after changing governor):\n cpu     governor        cur     min     max\n ------------------------------------------------\n cpu0    performance     1803000 300000  1803000\n cpu1    performance     1803000 300000  1803000\n cpu2    performance     1803000 300000  1803000\n cpu3    performance     1803000 300000  1803000\n cpu4    performance     2253000 400000  2253000\n cpu5    performance     2253000 400000  2253000\n cpu6    performance     2802000 500000  2802000\n cpu7    performance     2802000 500000  2802000\noriole:/data/local/tmp # ./set_pixel6_gpu_scaling_policy.sh\n GPU info (before changing frequency scaling policy):\n policy                                  cur     min     max\n --------------------------------------------------------------\n coarse_demand [adaptive] always_on      251000  151000  848000\n Setting GPU frequency scaling policy to performance\n GPU info (after changing frequency scaling policy):\n policy                                  cur     min     max\n --------------------------------------------------------------\n coarse_demand adaptive [always_on]      848000  151000  848000\n</code></pre> <ol> <li>Restore default frequencies:</li> </ol> <pre><code>oriole:/ # ./set_android_scaling_governor.sh sched_pixel\n...\noriole:/ # ./set_pixel6_gpu_scaling_policy.sh default\n...\n</code></pre> <p>TODO(scotttodd): Windows instructions</p>"},{"location":"developers/performance/profiling-cpu-events/","title":"Profiling CPUs","text":"<p>CPUs are able to record certain events that may be relevant when investigating the performance of a program. A common example of such an event is a \"cache miss\", when the program tries to access data in memory that isn't already in some CPU cache, causing that access to be slower than it could otherwise be.</p> <p>Querying and analyzing this data can be useful, but is hard in two distinct ways:</p> <ul> <li>Depending on the CPU and on the OS, both hardware and software limitations can   get in the way of obtaining accurate data.</li> <li>This data tends to be inherently difficult to interpret, even when it is   perfectly accurate. In practice it is often noisy and inaccurate, which makes   interpretation even more complicated.</li> </ul> <p>There are two parts to this page: platform-specific information about how to query this data, and, at the end, a platform-independent explanation of how to interpret it.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#perf-and-simpleperf-on-linux-and-android","title":"Perf and Simpleperf, on Linux and Android","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#overview","title":"Overview","text":"<p>The Linux kernel exposes system event counters to user-space programs by means of the <code>perf_event_open</code> system call. This includes both hardware event counters (such as CPU cache events) and software events from the kernel (such as page faults and context switches). Anyone may use this system call to implement a profiler, but Linux readily offers one, <code>perf</code>.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#preserving-artifacts","title":"Preserving artifacts","text":"<p>By default IREE cleans up any temporary files it creates while running. Tools like perf, however, require those files exist even after the process has exited. The environment variable <code>IREE_PRESERVE_DYLIB_TEMP_FILES</code> can be set to preserve the files. This is only needed for the CPU path when using the system loader.</p> <pre><code>export IREE_PRESERVE_DYLIB_TEMP_FILES=1\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#desktop-linux","title":"Desktop linux","text":"<p>On desktop Linux we can use <code>perf</code>. It is provided on most Linux distributions, for instance on Debian-based distributions do:</p> <pre><code>sudo apt install linux-perf\n</code></pre> <p>Run the program to be profiled, prepending its command line with <code>perf record</code>. By default this will write the profile data to the current directory, <code>./perf.data</code>. Sometimes this isn't ideal, such as then the current directory is under version control. Explicit paths can be specified by <code>-o</code> flag to direct the output of <code>perf record</code>, and then by <code>-i</code> flags to select the input of subsequent commands analyzing the profile. Example:</p> <pre><code>perf record -o /tmp/perf.data \\\n  ./tools/iree-benchmark-module \\\n    --device=local-task \\\n    ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p>By default, this samples time spent. One may specify instead an event to sample by, with the <code>-e</code> flag. For instance, to sample by L1 cache misses, one may do:</p> <pre><code>perf record -o /tmp/perf.data -e L1-dcache-load-misses \\\n  ./tools/iree-benchmark-module \\\n    --device=local-task \\\n    ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p><code>perf list</code> dumps the list of event types.</p> <p>Once you have recorded a profile, there are two main ways to analyze it: <code>perf report</code> and <code>perf annotate</code>.</p> <p><code>perf report</code> breaks down the event counts by symbol. In the default case where what was sampled was time, this is just an ordinary profile by symbol name, no different than what could be viewed in other profilers such as Tracy. Where it gets really interesting is when the profile was recording a specific event type, as in the above <code>-e L1-dcache-load-misses</code> example:</p> <pre><code>perf report -i /tmp/perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', Event count (approx.): 362571861\nOverhead  Command          Shared Object              Symbol\n  61.53%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_31\n  13.30%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_11\n   2.11%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_13\n   1.90%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_19\n   1.54%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_25\n   1.49%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_5\n</code></pre> <p><code>perf annotate</code> breaks down the event counts by instruction. Again, in the default case where what was sampled was time, this is no different than what could be viewed in Tracy, and the real motivation to use <code>perf</code> is when profiling by specific event types as in the above <code>-e L1-dcache-load-misses</code> example:</p> <pre><code>perf annotate -i perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', 4000 Hz, Event count (approx.): 362571861\nserving_default_ex_dispatch_31  /tmp/dylib_executablenzpx2Q.so [Percent: local period]\n  1.66 \u2502        movups -0x1000(%rdi),%xmm10\n  0.48 \u2502        movups -0x800(%rdi),%xmm9\n  0.82 \u2502        movups (%rdi),%xmm8\n  0.49 \u2502        movaps %xmm1,%xmm4\n  0.12 \u2502        shufps $0x0,%xmm1,%xmm4\n  0.14 \u2502        mulps  %xmm5,%xmm4\n  0.28 \u2502        addps  %xmm6,%xmm4\n  0.60 \u2502        movaps %xmm3,%xmm6\n  0.34 \u2502        shufps $0x0,%xmm3,%xmm6\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#warning","title":"Warning","text":"<p><code>perf annotate</code> is even noisier than <code>perf report</code> as it can be overly optimistic, depending on the CPU, to pin an event to a specific instruction. Typically, this works fairly well on x86 CPUs and less well on ARM CPUs and more generally on anything mobile. Even on a desktop x86 CPU, this is noisy, as the above example (recorded on a Skylake workstation) shows: it blamed a <code>mulps %xmm5,%xmm4</code> instruction for a cache miss, which doesn't make sense as that instruction only touches registers.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#android","title":"Android","text":"<p>On Android we can use <code>simpleperf</code>. It's preinstalled on current Android <code>userdebug</code> images, and part of the Android NDK.</p> <p>In theory, as Android is Linux, it should be possible to use <code>perf</code>. Unfortunately, <code>perf</code> is difficult to build for Android. Fortunately, <code>simpleperf</code> is readily available: it is preinstalled in Android <code>userdebug</code> images, and it is part of the Android NDK.</p> <p>First, we record on the device:</p> <pre><code>adb shell \\\n  simpleperf record -e raw-l1d-cache-refill -o /data/local/tmp/perf.data \\\n    /data/local/tmp/iree-benchmark-module \\\n      --device=local-task \\\n      ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p>Then pull the recorded data from the device, and analyze on the desktop. We assume that <code>${ANDROID_NDK}</code> points to the local copy of the Android NDK.</p> <pre><code>adb pull /data/local/tmp/perf.data /tmp/perf.data\n${ANDROID_NDK}/simpleperf/report.py -i /tmp/perf.data\n</code></pre> <p>This prints a breakdown of <code>raw-l1d-cache-refill</code> events by symbol.</p> <p>Like with <code>perf</code>, a list of event types can be queried by the <code>list</code> subcommand:</p> <pre><code>adb shell simpleperf list\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#no-support-for-annotate-by-cpu-event","title":"No support for <code>annotate</code> by CPU event","text":"<p>There is no <code>simpleperf annotate</code>. The <code>simpleperf</code> documentation lists a couple of ways of achieving the same thing.</p> <p>However:</p> <ul> <li>The common case of annotating by time, as opposed to annotating by CPU event,   is supported by Tracy.</li> <li>Annotating by CPU event is inherently not working due to hardware limitations   of the ARM CPUs found in Android devices. That is, the hardware is too   imprecise at pinning an event to a particular instruction.</li> </ul>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#interpreting-cpu-event-counts","title":"Interpreting CPU event counts","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#problems","title":"Problems","text":"<p>There are multiple layers of complexity in interpreting CPU event counts.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-in-themselves-normal","title":"These events are in themselves normal","text":"<p>The first difficulty is in the fact that most of these events are normal. So just knowing that they happened is not in itself actionable.</p> <p>For example, if we learn that some code causes cache misses, that isn't big news: so does all code. Maybe this code has too many cache misses, but how many is too many? Maybe this code alone accounts for a large fraction of the overall total of the whole program, but maybe even that is normal, for instance if the code being studied is the 'hot' part of the program where a large fraction of overall time is spent?</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-hardware-dependent-and-under-documented","title":"These events are hardware-dependent and under-documented","text":"<p>Many of these events have a meaning that varies between CPUs and that is difficult to characterize on any CPU, let alone in a way that applies to all CPUs.</p> <p>For example, take the \"L2 data cache refill\". On ARM, with <code>simpleperf</code>, that would be <code>raw-l2d-cache-refill</code>. Questions:</p> <ul> <li>Is \u201cL2\u201d inclusive of   \u201cL1\u201d?</li> <li>How many bytes are transferred per \u201crefill\u201d?</li> <li>Are accesses induced by speculative execution or by automatic pre-fetching   counted in the same way as accesses induced by actual code execution?</li> </ul> <p>The answers to all of the above questions are CPU-dependent. They may even vary between the CPU cores of the same Android device.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-imprecise-and-noisy-particularly-on-arm-cpus","title":"These events are imprecise and noisy, particularly on ARM CPUs","text":"<p>Expect noise levels above 10% in many CPU event counts on ARM CPUs. Moreover, on ARM, as discussed above, there is inaccuracy in which instruction is blamed for which event, which will increase inaccuracy of per-symbol breakdowns for very cheap symbols (and makes <code>perf annotate</code> impossible as noted above). Finally, be aware that some ARM CPUs may perform event count interpolation, so we may not have any access to true hardware counts.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#recommendations","title":"Recommendations","text":"<p>Here is a workflow pattern that allows to make significant use of CPU event counts, despite all the problems noted above:</p> <ul> <li>Hypothesize that some code diff might help performance, and might help   reducing the number of CPU events of a certain type, and that the two might be   related.</li> <li>Benchmark with and without the code diff, on the same device, everything else   being equal.<ul> <li>Let your benchmark perform a fixed number of iterations, or, if using a benchmark termination condition of the form \"run until at least N seconds have elapsed\", carefully divide event counts by the actual number of iterations that were run.</li> </ul> </li> <li>If the observed CPU event count difference is significant, go ahead and claim   that your code diff probably helps with that aspect of CPU behavior.</li> </ul> <p>Some things NOT to be done:</p> <ul> <li>Don\u2019t try to compare different metrics, not even when it seems obvious that   they should satisfy a simple relationship, not even on the same CPU (e.g. \u201cL1   accesses should be greater than L2 accesses\u201d).</li> <li>Don\u2019t divide by some \u201ctotal\u201d metric to get some kinds of ratios. For example,   don\u2019t try to compute a \u201ccache miss ratio\u201d as quotient of \u201ccache refill\u201d over   \u201call cache accesses\u201d metrics. The first problem with that (even before we get   to CPU-specific issues) is that that\u2019s rewarding increases to the \u201call cache   accesses\u201d metrics, so if something bad happens in your codegen and your kernel   ends up spilling a lot of register to the stack, that\u2019s going to be a lot more   accesses which will all be L1 hits so that\u2019ll help this ratio look better!  So   more generally, just try to minimize some CPU metrics (that count \u201ccostly\u201d   events), not some more complex math expression formed from arithmetic on CPU   metrics.</li> </ul>","tags":["CPU"]},{"location":"developers/performance/profiling-gpu-vulkan/","title":"Profiling GPUs using Vulkan","text":"<p>Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used.</p> <p>(TODO: add some pictures for each tool)</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#renderdoc","title":"RenderDoc","text":"<p>Support for RenderDoc can be enabled by configuring cmake with <code>-DIREE_ENABLE_RENDERDOC_PROFILING=ON</code>. When built in to IREE the profiling functionality is available for programmatic use via the <code>iree_hal_device_profiling_begin</code> and <code>iree_hal_device_profiling_end</code> APIs.</p> <p>When using one of the standard IREE tools (<code>iree-run-module</code>, <code>iree-benchmark-module</code>, etc) the <code>--device_profiling_mode=queue</code> flag can be passed to enable capture around the entire invocation (be careful when benchmarking as the recordings can be quite large!). The default capture file name can be specified with <code>--device_profiling_file=foo.rdc</code>.</p> <p>Capturing in the RenderDoc UI can be done by specifying the IREE tool or embedding application (<code>iree-run-module</code>, etc) as the launch executable and adding all arguments as normal.</p> <p>Capturing from the command line can be done using <code>renderdoccmd</code> with the specified file appearing (by default) in the executable directory:</p> <pre><code>renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ...\nstat tools/foo.rdc\nrenderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ...\nstat /some/path/foo.rdc\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#android-gpus","title":"Android GPUs","text":"<p>There are multiple GPU vendors for the Android platforms, each offering their own tools. Android GPU Inspector (AGI) provides a cross-vendor solution. See the documentation for more details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#desktop-gpus","title":"Desktop GPUs","text":"<p>Vulkan supports both graphics and compute, but most tools in the Vulkan ecosystem focus on graphics. As a result, some Vulkan profiling tools expect commands to correspond to a sequence of frames presented to displays via framebuffers. This means additional steps for IREE and other Vulkan applications that solely rely on headless compute. For graphics-focused tools, we need to wrap IREE's logic inside a dummy rendering loop in order to provide the necessary markers for these tools to perform capture and analysis.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#amd","title":"AMD","text":"<p>For AMD GPUs, Radeon GPU Profiler (RGP) is the tool to understand fine details of how IREE GPU performs. See the documentation for details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#nvidia","title":"NVIDIA","text":"<p>For NVIDIA GPUs, NVIDIA Nsight Graphics is the tool to understand fine details of how IREE GPU performs. See the documentation for details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-with-tracy/","title":"Profiling with Tracy","text":""},{"location":"developers/performance/profiling-with-tracy/#overview","title":"Overview","text":"<p>Tracy is a hybrid instrumentation and sampling profiler that IREE uses for performance analysis.</p> <p></p>"},{"location":"developers/performance/profiling-with-tracy/#instrumentation-and-sampling","title":"Instrumentation and sampling","text":"<ul> <li> <p>Instrumentation is generic code built into the program being profiled,     recording zone start and end timestamps where a developer requests them:</p> <p></p> <p>Most of IREE's runtime code is instrumented using the macros defined in iree/base/tracing.h:</p> <pre><code>void iree_sample_function() {\n  IREE_TRACE_ZONE_BEGIN(z0);\n  // All code here will be included in the zone for `iree_sample_function`.\n  IREE_TRACE_ZONE_END(z0);\n}\n</code></pre> </li> <li> <p>Sampling collects program state and information about the machine using     platform-specific APIs at a regular sampling frequency. Sampled data     includes callstacks, hardware counters, and more:</p> <p></p> <p>While recording instrumentation data requires no special setup, recording sampling data will need some configuration depending on your operating system. Refer to the \"Automated data collection\" section in the Tracy PDF manual for full details. Generally, sampling needs:</p> <ul> <li>Debug information from <code>-DCMAKE_BUILD_TYPE=RelWithDebInfo</code> or <code>Debug</code></li> <li>Privilege elevation from <code>sudo</code> on Unix or adminstrator on Windows</li> </ul> </li> </ul>"},{"location":"developers/performance/profiling-with-tracy/#remote-or-embedded-telemetry","title":"Remote or embedded telemetry","text":"<p>Tracy uses a client-server model with communication over a TCP socket:</p> <ul> <li>The \"client\" is the program being profiled.</li> <li>The \"server\" is either the Tracy profiler UI or the Tracy command-line   capture tool.</li> </ul> <pre><code>graph LR\n  tracyclient[\"Tracy Client\n  e.g. iree-run-module\"]\n  tracyserver[\"Tracy Server\"]\n  network([\"Network\"])\n\n  thread1[\"Thread 1\"] --&gt; tracyclient\n  thread2[\"Thread 2\"] --&gt; tracyclient\n  thread3[\"Thread 3\"] --&gt; tracyclient\n\n  tracyclient --&gt; network\n  network --&gt; tracyserver\n\n  tracyserver --&gt; display[\"Display\"]\n  tracyserver --&gt; storage[\"Storage\"]</code></pre> <p>This allows for remote capture, such as over SSH, as well as sharing of saved traces across machines.</p>"},{"location":"developers/performance/profiling-with-tracy/#the-tracy-manual","title":"The Tracy manual","text":"<p>The primary source of Tracy documentation, including how to build the profiler UI and CLI capture tool, is a PDF manual:</p> <p>Download tracy.pdf  View tracy.pdf in browser </p>"},{"location":"developers/performance/profiling-with-tracy/#capturing-a-trace","title":"Capturing a trace","text":"<p>You will need three things to capture a trace:</p> <ol> <li>The Tracy profiler UI (or CLI capture tool)</li> <li>A binary tool to trace, such as <code>iree-run-module</code>, built with tracing     support enabled</li> <li>A program to profile, e.g. a <code>.vmfb</code> file with parameters and input values</li> </ol> <p>The Tracy tools can either be downloaded from the official releases or they can be built from source by using either the upstream CMake build or IREE's downstream CMake build.</p>"},{"location":"developers/performance/profiling-with-tracy/#quickstart","title":"Quickstart","text":"<ol> <li> <p>Build <code>iree-run-module</code> (or other tools like <code>iree-benchmark-module</code>) with     tracing support:</p> <pre><code># Sampling needs debug info from the `RelWithDebInfo` or `Debug` build type.\n\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON\ncmake --build ../iree-build/ --target iree-run-module\n</code></pre> <p>For more information about building from source, follow the Getting started page.</p> Tip - Instrumented Python packages <p>The <code>iree-runtime</code> Python package includes prebuilt instrumented tools. Set the <code>IREE_PY_RUNTIME=tracy</code> environment variable to use them:</p> <pre><code>python -m pip install iree-runtime\nIREE_PY_RUNTIME=tracy iree-run-module ...\n</code></pre> <p>You should see the following message printed to stderr:</p> <p><code>-- Using Tracy runtime (IREE_PY_RUNTIME=tracy)</code></p> <p>See this section in the Python bindings documentation for more details.</p> </li> <li> <p>Compile a program to profile:</p> <pre><code># The --iree-hal-executable-debug-level=3 flag embeds source information\n# about each executable into the .vmfb file for the runtime to pass to\n# Tracy. Without this flag, source locations are included on a best-effort\n# basis, typically coming from the input .mlir or .py file.\n\niree-compile program_input.mlir \\\n  --iree-hal-target-backends={target} \\\n  --iree-hal-executable-debug-level=3 \\\n  -o program.vmfb\n</code></pre> </li> <li> <p>Run the program using the instrumented <code>iree-run-module</code>:</p> <pre><code># Set the TRACY_NO_EXIT environment variable to keep short-running programs\n# from exiting before connecting.\n#\n# Some platforms need elevated permissions (root / sudo / administrator)\n# to collect sampling data using kernel facilities. If you only want to\n# collect instrumentation data or your platform does not require it, you\n# can run with more limited permissions.\n\nTRACY_NO_EXIT=1 sudo iree-run-module \\\n  --module=program.vmfb \\\n  --device={device} \\\n  --entry_function={entry} \\\n  --parameters={parameters} \\\n  --input={arg0} \\\n  --input={arg1} \\\n  ...\n</code></pre> </li> <li> <p>While the program is running, connect using the Tracy profiler UI or capture     tool:</p> Tracy profiler UITracy capture tool <p>The profiler UI lists available clients or can be set to connect to the next instrumented process:</p> <p></p> <p>The capture tool can be used programmatically and over SSH:</p> <pre><code>$ capture -o /tmp/capture.tracy\n\nConnecting to 127.0.0.1:8086...\n</code></pre> </li> <li> <p>View the captured trace once it finishes collecting events. Traces captured     by the profiler UI can also be saved to <code>.tracy</code> files for sharing and     archival.</p> </li> </ol>"},{"location":"developers/performance/profiling-with-tracy/#including-more-information-in-traces","title":"Including more information in traces","text":""},{"location":"developers/performance/profiling-with-tracy/#changing-iree_tracing_mode","title":"Changing <code>IREE_TRACING_MODE</code>","text":"<p>Set IREE's <code>IREE_TRACING_MODE</code> value (defined in iree/base/tracing.h) to adjust which tracing features are enabled. Each feature adds tracing overhead and increases the size of trace files, so adjust this setting with care.</p> <p>For example, to track memory allocations with callstacks:</p> <pre><code>cmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON \\\n    -DIREE_TRACING_MODE=4\ncmake --build ../iree-build/ --target iree-run-module\n</code></pre> <p>The Memory window in the Tracy profiler should then show callstacks for each allocation:</p> <p></p>"},{"location":"developers/performance/profiling-with-tracy/#options-for-the-llvm-cpu-backend","title":"Options for the <code>llvm-cpu</code> backend","text":"<p>When using the <code>llvm-cpu</code> backend (<code>--iree-hal-target-backends=llvm-cpu</code> with <code>--device=local-task</code> or <code>--device=local-sync</code>), these options are available:</p> <ul> <li> <p>The <code>--iree-llvmcpu-link-embedded=false</code> flag uses the \"system\" linker     (.so/.dylib/.dll) instead of the generic     \"embedded\" ELF linker, allowing Tracy to look more deeply at generated code:</p> <p></p> </li> <li> <p>The <code>IREE_PRESERVE_DYLIB_TEMP_FILES</code> environment variable can be used on     Posix platforms to ensure that Tracy can view IREE's generated native code.</p> </li> <li> <p>Ensure that <code>--iree-llvmcpu-debug-symbols=true</code> is set (it is by default).</p> </li> </ul> <p>Putting those flags and environment variables together in an example:</p> <pre><code>iree-compile program_input.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-executable-debug-level=3 \\\n  --iree-llvmcpu-link-embedded=false \\\n  --iree-llvmcpu-debug-symbols=true \\\n  -o program_full_info.vmfb\n\nTRACY_NO_EXIT=1 IREE_PRESERVE_DYLIB_TEMP_FILES=1 sudo iree-run-module \\\n  --device=local-task \\\n  --module=program_full_info.vmfb \\\n  ...\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#remote-capture-eg-ssh-android","title":"Remote capture (e.g. SSH, Android)","text":"<p>Tracy's client/server connection uses TCP port 8086 by default. If the Tracy-instrumented program is running on a separate machine, this port needs to be forwarded.</p> <p>In particular, when profiling on Android, this is needed:</p> <pre><code>adb forward tcp:8086 tcp:8086\n</code></pre> <p>You can also pass <code>-p &lt;port&gt;</code> to the capture tool to override the default port to connect to, or use the Tracy GUI which scans other ports too.</p>"},{"location":"developers/performance/profiling-with-tracy/#touring-the-tracy-profiler-ui","title":"Touring the Tracy profiler UI","text":"<p>The initial view should look like this:</p> <p></p> <p>Before going further, take a second to check that your recorded profile data has all the data that it should have. Permissions issues could cause it to lack \"sampling\" or \"CPU data\" information. For example, here is what he initial view looks like when one forgot to run the profiled program as root on Desktop Linux (where running as root is required):</p> <p></p> <p>Notice how the latter screenshot is lacking the following elements:</p> <ul> <li>No 'CPU data' header in the top left, with the list of all CPU cores.</li> <li>No 'ghost' icon next to the 'Main thread' header.</li> </ul> <p>Click the 'Statistics' button at the top. It will open a window like this:</p> <p></p> <p>See how the above screenshot has two radio buttons at the top: 'Instrumentation' and 'Sampling'. At this point, if you don't see the 'Sampling' radio button, you need to resolve that first, as discussed above about possible permissions issues.</p> <p>These 'Instrumentation' and 'Sampling' statistics correspond the two kinds of data that Tracy collects about your program. In the Tracy main view, they correspond, respectively, to 'instrumentation' and 'ghost' zones. Refer to the Tracy PDF manual for a general introduction to these concepts. For each thread, the ghost icon toggles the view between these two kinds of zones.</p> <p>Back to the main view, look for the part of the timeline that is of interest to you. Your area of interest might not be on the Main thread. In fact, it might be on a thread that's not visible in the initial view at all. To pan around with the mouse, hold the right mouse button down (or its keyboard equivalent on macOS). Alternatively, look for the 'Frame' control at the top of the Tracy window. Use the 'next frame' arrow button until more interesting threads appear.</p> <p>IREE module code tends to run on a thread whose name contains the word <code>worker</code>.</p> <p>Once you have identified the thread of interest, you typically want to click its ghost icon to view its \"ghost\" (i.e. sampling) zones. Here is what you should get when clicking on a ghost zone:</p> <p></p> <p>The percentages column to the left of the disassembly shows where time is being spent. This is unique to the sampling data (ghost zones) and has no equivalent in the instrumentation data (instrumentation zones). Here is what we get clicking on the corresponding instrumentation zone:</p> <p></p> <p>This still has a 'Source' button but that only shows the last C++ caller that had explicit Tracy information, so here we see a file under <code>iree/hal</code> whereas the Ghost zone saw into the IREE compiled module that that calls into, with the source view pointing to the <code>.mlir</code> file.</p>"},{"location":"developers/performance/profiling-with-tracy/#tracing-iree-compile","title":"Tracing <code>iree-compile</code>","text":"<p>Tracing <code>iree-compile</code> is much like tracing the runtime tools, except that both of these options need to be set with CMake: <code>-DIREE_ENABLE_RUNTIME_TRACING=ON -DIREE_ENABLE_COMPILER_TRACING=ON</code>:</p> <pre><code>cmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON \\\n    -DIREE_ENABLE_COMPILER_TRACING=ON\ncmake --build ../iree-build/ --target iree-compile\n</code></pre> <p>The steps for collecting traces are the same: run the instrumented program and connect using the Tracy profiler UI or capture tool.</p> <p></p> <ul> <li>MLIR passes are instrumented using   Pass Instrumentation,   (see   <code>TracingUtils.h</code>)</li> <li>Zones are annotated with op breadcrumbs indicating which root op was processed</li> <li>Each compilation phase (e.g. Flow, Stream, HAL) is tagged as a \"frame\", so   you can jump between them, limit statistics to them, and see how much time   each took</li> </ul> Caution - Tracy sampling with <code>iree-compile</code> <p>When tracing the compiler, the LLVM/MLIR code can easily generate millions of trace events. Traces captured with sampling can thus take hours to collect, require 40GB+ of RAM to view, and take 1GB+ on disk to store.</p> <p></p> <p>However, sampling is especially useful in diagnosing long compile times, since only the MLIR passes are instrumentated, unlike in IREE's runtime where most functions are covered.</p> <p>For more tips on profiling the compiler, see the Compile time regression debugging page.</p>"},{"location":"developers/performance/profiling-with-tracy/#troubleshooting","title":"Troubleshooting","text":""},{"location":"developers/performance/profiling-with-tracy/#resource_exhausted-failed-to-open-file-issue","title":"\"RESOURCE_EXHAUSTED; failed to open file\" issue","text":"<p>This is a known issue with how tracy operates. One way to workaround it is to manually increase the total number of files that can be kept opened simultaneously and run the command with that setting:</p> <pre><code>sudo sh -c \"ulimit -n &lt;bigNum&gt; &amp;&amp; &lt;myTracyInstrumentedProgram&gt;\"\n</code></pre> <p>Info</p> <p>Tracy keeps a number of file descriptors open that, depending on the machine and its settings, may exceed the limit allowed by the system resulting in IREE failing to open more files. In particular, it is commom to have a relatively low limit when running with <code>sudo</code>.</p>"},{"location":"developers/performance/profiling-with-tracy/#appendix","title":"Appendix","text":""},{"location":"developers/performance/profiling-with-tracy/#building-tracy-from-source","title":"Building Tracy from source","text":""},{"location":"developers/performance/profiling-with-tracy/#install-dependencies","title":"Install dependencies","text":""},{"location":"developers/performance/profiling-with-tracy/#do-you-need-capstone-next","title":"Do you need capstone-next?","text":"<p>You can skip this section if you don't need disassembly of CPU code.</p> <p>Capstone is the disassembly framework used by Tracy. The default branch, which is what OS packages still distribute, is running a few years behind current CPU architectures.</p> <p>Newer CPU architectures such as RISC-V, or newer extensions of existing architectures (e.g. new SIMD instructions in the ARM architecture) are typically only supported in the <code>next</code> branch. If you need that support, check out and build that branch. Consider uninstalling any OS package for <code>capstone</code> or otherwise ensure that your IREE build will pick up your <code>next</code> branch build.</p>"},{"location":"developers/performance/profiling-with-tracy/#linux","title":"Linux","text":"<p>If you haven't opted to build <code>capstone-next</code> (see above section), install the OS package for <code>capstone</code> now (Debian-based distributions):</p> <pre><code>sudo apt install libcapstone-dev\n</code></pre> <p>Install other dependencies:</p> <pre><code>sudo apt install libtbb-dev libzstd-dev libglfw3-dev libfreetype6-dev libgtk-3-dev\n</code></pre> <p>If you only build the command-line tool <code>iree-tracy-capture</code> and not the graphical <code>iree-tracy-profiler</code>, you can install only:</p> <pre><code>sudo apt install libtbb-dev libzstd-dev\n</code></pre> <p>The zstd version on Ubuntu 18.04 is old. You will need to install it from source from https://github.com/facebook/zstd.git</p>"},{"location":"developers/performance/profiling-with-tracy/#mac","title":"Mac","text":"<p>If you haven't opted to build <code>capstone-next</code> (see above section), install the system <code>capstone</code> now:</p> <pre><code>brew install capstone\n</code></pre> <p>Install other dependencies:</p> <pre><code>brew install pkg-config glfw freetype tbb zstd\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#build-the-tracy-tools","title":"Build the Tracy tools","text":"<p>A CMake-based build system for Tracy is maintained as part of IREE. In your IREE host build directory, set the following CMake option:</p> <pre><code>cmake -DIREE_BUILD_TRACY=ON -DIREE_ENABLE_LLD=ON .\n</code></pre> <p>That enables building the Tracy server tools, <code>iree-tracy-profiler</code> and <code>iree-tracy-capture</code>, introduced above. It also enables building the tool <code>iree-tracy-csvexport</code> which can be used to export a captured trace as a CSV file (see Section 6 \"Exporting zone statistics to CSV\" in the Tracy manual).</p> <p>TODO - switch to using upstream CMake project</p> <p>Tracy now has an upstream CMake build for each of its components. We may be able to use this directly.</p> <p>If profiling on Android/ARM, you might need the patch discussed in the next paragraph.</p> <p>Consider building without assertions (<code>cmake -DIREE_ENABLE_ASSERTIONS=OFF</code>). At least <code>iree-tracy-profiler</code> has some faulty assertions that can cause the profiler UI to crash during normal usage.</p> <p>Rebuild, either everything or just these specific targets:</p> <pre><code>cmake --build . --target iree-tracy-profiler iree-tracy-capture iree-tracy-csvexport\n</code></pre> <p>This should have created the <code>iree-tracy-profiler</code>, <code>iree-tracy-capture</code>, and <code>iree-tracy-csvexport</code> binaries:</p> <pre><code>$ find . -name iree-tracy-*\n./tracy/iree-tracy-profiler\n./tracy/iree-tracy-capture\n./tracy/iree-tracy-csvexport\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#android-system-settings-required-for-sampling-and-systrace","title":"Android system settings required for Sampling and SysTrace","text":"<p>When profiling on an Android device, in order to get the most useful information in the trace, tweak system permissions as follows before profiling. This needs to be done again after every reboot of the Android device.</p> <p>From your desktop, get a shell on the Android device:</p> <pre><code>adb shell\n</code></pre> <p>The following commands are meant to be run from that Android device shell. First, get root access:</p> <pre><code>su\n</code></pre> <p>Now run the following commands as root on the Android device:</p> <pre><code>setenforce 0\nmount -o remount,hidepid=0 /proc\necho 0 &gt; /proc/sys/kernel/perf_event_paranoid\necho 0 &gt; /proc/sys/kernel/kptr_restrict\n</code></pre> <p>Note: in order for this to work, the device needs to be rooted, which means that the above <code>su</code> command must succeed. This is sometimes confused with the <code>adb root</code> command, but that's not the same. <code>adb root</code> restarts the <code>adbd</code> daemon as root, which causes device shells to be root shells by default. This is unnecessary here and we don't recommend it: real Android applications never run as root, so Tracy/Android has to support running benchmarks as regular user and it's best to stick to this for the sake of realistic benchmarks. Internally, Tracy executes <code>su</code> commands to perform certain actions, so it too relies on the device being rooted without relying on the benchmark process being run as root.</p>"},{"location":"developers/performance/profiling/","title":"Profiling overview","text":"<p>IREE benchmarking gives us an accurate and reproducible view of program performance at specific levels of granularity. To analyze system behavior in more depth, there are various ways to profile IREE.</p>"},{"location":"developers/performance/profiling/#cpu-cache-and-other-cpu-event-profiling","title":"CPU cache and other CPU event profiling","text":"<p>For some advanced CPU profiling needs such as querying CPU cache and other events, one may need to use some OS-specific profilers. See Profiling CPUs.</p>"},{"location":"developers/performance/profiling/#vulkan-gpu-profiling","title":"Vulkan GPU Profiling","text":"<p>Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used. Refer to Profiling GPUs using Vulkan.</p>"},{"location":"developers/performance/profiling/#tracy","title":"Tracy","text":"<p>Tracy is a profiler that's been used for a wide range of profiling tasks on IREE. Refer to Profiling with Tracy.</p>"},{"location":"guides/","title":"Guides","text":""},{"location":"guides/#ml-frameworks","title":"ML frameworks","text":"<p>Start here: ML frameworks overview</p> <p>Guides for specific frameworks:</p> <ul> <li> TensorFlow and    TensorFlow Lite</li> <li> JAX</li> <li> PyTorch</li> </ul>"},{"location":"guides/#deployment-configurations","title":"Deployment configurations","text":"<p>Start here: Deplyment configurations overview</p> <p>Guides for specific configurations:</p> <ul> <li> CPU for general   purpose CPU deployment</li> <li> CPU - Bare-Metal   with minimal platform dependencies</li> <li> GPU - Vulkan   for cross-platform usage and interop with graphics applications</li> <li> GPU - CUDA   for NVIDIA-specific solutions</li> <li> GPU - ROCm   for AMD-specific solutions</li> <li> GPU - Metal   for running on Apple hardware</li> </ul>"},{"location":"guides/#general-topics","title":"General topics","text":"<ul> <li> Parameters for managing   large chunks of program data</li> </ul>"},{"location":"guides/parameters/","title":"Parameters","text":""},{"location":"guides/parameters/#overview","title":"Overview","text":"<p>Parameters in IREE are externalized storage for resources that are asynchronously accessible and device-aware. Parameters offer efficient ways to store, manipulate, and load data for large resources like the weights in a machine learning model.</p> <p>Without using parameters, compiled programs include both code and data:</p> <pre><code>graph LR\n  accTitle: .vmfb file without using parameters\n  accDescr {\n    Without using parameters, .vmfb files contain host code, device code,\n    small data, and large resources all in the same file.\n  }\n\n  subgraph VMFB[\".vmfb file\"]\n    HostCode(Host code)\n    DeviceCode(Device code)\n    SmallData(Small data)\n    LargeResources(Large resources)\n  end</code></pre> <p>Using parameters, data can be stored, transmitted, and loaded from separate sources:</p> <pre><code>graph BT\n  accTitle: .vmfb file using parameters\n  accDescr {\n    Using parameters, .vmfb files contain host code, device code, small\n    constants, and parameters. External .irpa, .safetensors, and .gguf files\n    can be linked to these parameters.\n  }\n\n  subgraph VMFB[\".vmfb file using parameters\"]\n    HostCode(Host code)\n    DeviceCode(Device code)\n    SmallData(Small data)\n    Parameters(\"Parameters\n    \u2022 scope_1::key_1\n    \u2022 scope_1::key_2\n    \u2022 scope_2::key_1\n    \u2022 scope_2::key_2\")\n  end\n\n  subgraph IRPA[\".irpa file\"]\n    key_1\n    key_2\n  end\n\n  subgraph Safetensors[\".safetensors file\"]\n    key_1a[key_1]\n  end\n\n  subgraph GGUF[\".gguf file\"]\n    key_2a[key_2]\n  end\n\n  IRPA -. \"scope_1\" .-&gt; Parameters\n  Safetensors -. \"scope_2\" .-&gt; Parameters\n  GGUF -. \"scope_2\" .-&gt; Parameters</code></pre> <p>Note</p> <p>Notice that parameters are identified by a scope and a unique key within that scope, not strong references to specific file paths. Data from any supported file format or \"parameter index provider\" can be loaded.</p>"},{"location":"guides/parameters/#supported-formats","title":"Supported formats","text":""},{"location":"guides/parameters/#irpa","title":"IRPA","text":"<p>The IREE Parameter Archive (IRPA) file format (<code>iree/schemas/parameter_archive.h</code>) is IREE's own format optimized for deployment. Formats like GGUF and safetensors can be converted to IRPA.</p> <ul> <li>Data is always aligned in IRPA files for efficient loading</li> <li>IRPA files contain minimal metadata and are fully hermetic. Buffers are   stored as opaque byte range blobs, not as tensors with explicit types and   shapes</li> <li>For testing and benchmarking workflows, IRPA files may include a mix of real   data and splatted values (repeating patterns with no storage requirements on   disk)</li> </ul>"},{"location":"guides/parameters/#gguf","title":"GGUF","text":"<p>The GGUF format is used by the GGML project and other projects in that ecosystem like llama.cpp.</p> <ul> <li>GGUF files are non-hermetic - using them requires knowledge about the settings   used to compile GGML in order to interpret the contents of each file   (particularly for various quantization formats)</li> <li>GGUF files are aligned, so they should have matching performance with IRPA   files</li> </ul>"},{"location":"guides/parameters/#safetensors","title":"Safetensors","text":"<p>The safetensors format is used by the Hugging Face community.</p> <ul> <li>Safetensors files are not naturally aligned to support efficient loading, so   using them across runtime devices comes with (possibly severe) performance   penalties</li> </ul>"},{"location":"guides/parameters/#extensibility-and-other-formats","title":"Extensibility and other formats","text":"<p>The core IREE tools are written in C and aim to be simple and pragmatic, with minimal dependencies. Other formats could be converted into supported file types:</p> <ul> <li>PyTorch <code>.pt</code> and <code>.pth</code> files (serialized state dictionaries produced with   <code>torch.save</code>)</li> <li>TensorFlow checkpoint (<code>.ckpt</code>, <code>.h5</code>) files or SavedModel /   <code>model.keras</code>   archives (see the TensorFlow guide)</li> </ul> <p>In-tree formats for file-backed parameters are defined in the <code>iree/io/formats/</code> folder. Additional formats could be defined out-of-tree to make use of external libraries as needed.</p> <p>Parameter loading from memory (or a cache, or some other location) is possible by adding new providers implementing <code>iree_io_parameter_provider_t</code>. The default parameter index provider operates on files on local disk.</p>"},{"location":"guides/parameters/#working-with-parameter-files","title":"Working with parameter files","text":""},{"location":"guides/parameters/#creating-parameter-files","title":"Creating parameter files","text":"<p>The <code>iree-create-parameters</code> tool can create IREE Parameter Archive (.irpa) files. Each parameter in the archive can be created with either a real data value (taking up storage space in the final archive) or a splatted value (zeroed contents or a repeated value, taking up no storage space on disk).</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-create-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-create-parameters\n# ============================================================================\n\nCreates IREE Parameter Archive (.irpa) files. Provide zero or more\nparameter value declarations and an output file with\n`--output=file.irpa` to produce a new file with zeroed or patterned\ncontents.\n\n...\n</code></pre> <ul> <li> <p>Example creating a file with two zeroed embedded parameters and one with a   repeating pattern:</p> <pre><code>$ iree-create-parameters \\\n    --data=my.zeroed_param_1=4096xf32 \\\n    --data=my.zeroed_param_2=2x4096xi16 \\\n    --data=my.pattern_param_2=8x2xf32=2.1 \\\n    --output=output_with_storage.irpa\n</code></pre> </li> <li> <p>Example creating a file with splatted values (no storage on disk):</p> <pre><code>$ iree-create-parameters \\\n    --splat=my.splat_param_1=4096xf32=4.1 \\\n    --splat=my.splat_param_2=2x4096xi16=123 \\\n    --output=output_without_storage.irpa\n</code></pre> </li> </ul> <p>Parameter archives can also be created using IREE's Python bindings:</p> <pre><code>import iree.runtime as rt\nimport numpy as np\n\nparameter_index = rt.ParameterIndex()\nparameter_index.add_buffer(\"weight\", np.zeros([32, 16]) + 2.0)\nparameter_index.add_buffer(\"bias\", np.zeros([32, 16]) + 0.5)\nparameter_index.create_archive_file(\"parameters.irpa\")\n</code></pre> <p>See the <code>runtime/bindings/python/tests/io_test.py</code> file for more usage examples.</p>"},{"location":"guides/parameters/#converting-to-the-irpa-format","title":"Converting to the IRPA format","text":"<p>The <code>iree-convert-parameters</code> tool converts supported files into IREE Parameter Archives (.irpa) files.</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-convert-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-convert-parameters\n# ============================================================================\n\nConverts supported parameter file formats into IREE Parameter Archives\n(.irpa) files. Provide one or more input parameter files in the same\nform as expected by the iree-run-module tool (`--parameters=foo.gguf`)\nand an output file with `--output=file.irpa`.\n\n...\n</code></pre> <ul> <li> <p>Example converting from safetensors to IRPA:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=input.safetensors \\\n    --output=output.irpa\n</code></pre> </li> <li> <p>Example mutating parameters:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=a.gguf \\\n    --parameters=b.safetensors \\\n    --exclude=unneeded_param \\\n    --rename=old_name=new_name \\\n    --splat=some_name=f32=4.2 \\\n    --output=ab.irpa\n</code></pre> </li> <li> <p>Example stripping parameters and replacing them with zeros except for one   with special handling:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=input.irpa \\\n    --strip \\\n    --splat=special_param=f32=1.0 \\\n    --output=output.irpa\n</code></pre> </li> </ul>"},{"location":"guides/parameters/#inspecting-parameter-files","title":"Inspecting parameter files","text":"<p>The <code>iree-dump-parameters</code> tool outputs information about parsed parameter files.</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-dump-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-dump-parameters\n# ============================================================================\n\nDumps information about parsed parameter files.\n\n...\n</code></pre> <ul> <li> <p>Example listing all available parameters and their index information:</p> <pre><code>$ iree-dump-parameters \\\n    --parameters=my_scope=my_file.gguf \\\n    [--parameters=...]\n</code></pre> </li> <li> <p>Example extracting parameter binary contents from a file:</p> <pre><code>$ iree-dump-parameters ... \\\n    --extract=scope::key0=file0.bin \\\n    [--extract=...]\n</code></pre> </li> </ul>"},{"location":"guides/parameters/#loading-parameters-from-files","title":"Loading parameters from files","text":""},{"location":"guides/parameters/#on-the-command-line","title":"On the command line","text":"<p>IREE command line tooling can load parameter files alongside module files:</p> <pre><code>iree-run-module --module=program.vmfb --parameters=data.irpa ...\n</code></pre> <p>For concrete examples, see these test files:</p> <ul> <li><code>tools/test/parameters_scoped.mlir</code></li> <li><code>tools/test/parameters_unscoped.mlir</code></li> </ul>"},{"location":"guides/parameters/#from-python","title":"From Python","text":"<p>See the <code>runtime/bindings/python/tests/io_runtime_test.py</code> file for usage examples.</p>"},{"location":"guides/parameters/#using-the-c-api","title":"Using the C API","text":"<p>TODO: <code>iree_io_parameters_module_create()</code> sample code</p>"},{"location":"guides/deployment-configurations/","title":"Deployment configurations","text":"<p>IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE to load programs on demand and to take advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.</p>"},{"location":"guides/deployment-configurations/#stable-configurations","title":"Stable configurations","text":"<ul> <li> CPU for general   purpose CPU deployment</li> <li> CPU - Bare-Metal   with minimal platform dependencies</li> <li> GPU - Vulkan   for cross-platform usage and interop with graphics applications</li> <li> GPU - CUDA   for NVIDIA-specific solutions</li> <li> GPU - ROCm   for AMD-specific solutions</li> <li> GPU - Metal   for running on Apple hardware</li> </ul> <p>These are just the most stable configurations IREE supports. Feel free to reach out on any of IREE's communication channels if you have questions about a specific platform, hardware accelerator, or set of system features.</p>"},{"location":"guides/deployment-configurations/#compiler-target-backends","title":"Compiler target backends","text":"<p>Compiler target backends are used to generate executable code for hardware APIs and device architectures. Compiler targets may implement special optimizations or generate distinct code for certain device/architecture/performance profiles.</p> <p>When compiling programs, a list of target backends must be specified via</p> <ul> <li><code>--iree-hal-target-backends=</code> (command-line)</li> <li><code>target_backends=[...]</code> (Python)</li> </ul> Target backend Description Compatible HAL devices <code>llvm-cpu</code> Code generation for CPU-like devices supported by LLVM <code>local-sync</code>, <code>local-task</code> <code>vmvx</code> Portable interpreter powered by a microkernel library <code>local-sync</code>, <code>local-task</code> <code>vulkan-spirv</code> Portable GPU support via SPIR-V for Vulkan <code>vulkan</code> <code>cuda</code> NVIDIA GPU support via PTX for CUDA <code>cuda</code> <code>metal-spirv</code> GPU support on Apple platforms via MSL for Metal <code>metal</code> <code>rocm</code> Experimental  AMD GPU support via HSACO for ROCm <code>rocm</code> <code>webgpu-spirv</code> Experimental  GPU support on the Web via WGSL for WebGPU <code>webgpu</code> <p>Tip - listing available backends</p> <p>The list of compiler target backends can be queried:</p> Command-linePython bindings <pre><code>$ iree-compile --iree-hal-list-target-backends\n\nRegistered target backends:\n    cuda\n    llvm-cpu\n    metal\n    metal-spirv\n    rocm\n    vmvx\n    vmvx-inline\n    vulkan\n    vulkan-spirv\n</code></pre> <pre><code>iree.compiler.query_available_targets()\n\n['cuda',\n 'llvm-cpu',\n 'metal',\n 'metal-spirv',\n 'rocm',\n 'vmvx',\n 'vmvx-inline',\n 'vulkan',\n 'vulkan-spirv']\n</code></pre>"},{"location":"guides/deployment-configurations/#runtime-hal-driversdevices","title":"Runtime HAL drivers/devices","text":"<p>Runtime HAL devices call into hardware APIs to load and run executable code. Devices may use multithreading or other system resources, depending on their focus and the build configuration.</p> HAL device Description <code>local-sync</code> Synchronous local CPU device with inline execution <code>local-task</code> Multithreaded local CPU device using a 'task' executor <code>vulkan</code> Portable GPU execution using the Vulkan API <code>cuda</code> NVIDIA GPU execution using CUDA <code>metal</code> GPU execution on Apple platforms using Metal <code>rocm</code> Experimental  AMD GPU execution using ROCm <code>webgpu</code> Experimental  GPU execution on the web using WebGPU <p>Additional HAL drivers can also be defined external to the core project via <code>IREE_EXTERNAL_HAL_DRIVERS</code>.</p>"},{"location":"guides/deployment-configurations/bare-metal/","title":"Running on a bare-metal platform","text":"<p>IREE supports model execution via CPU on bare-metal platforms. Bare metal platforms have no operating system support, and executables are built using machine-specific linker scripts and/or board support packages (BSPs).</p> <p>Bare-metal deployment typically uses IREE's LLVM compiler target backend much like the CPU configuration, but using a limited subset of IREE's CPU HAL driver code at runtime to load and execute compiled programs.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#prerequisites","title":"Prerequisites","text":"<p>Out-of-tree bare-metal platform tools and source code for the system should be ready, such as</p> <ul> <li>Compilation toolchain</li> <li>Platform linker script</li> <li>Firmware libraries</li> </ul> <p>Please follow the instructions to retrieve the IREE compiler.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compile-the-model-for-bare-metal","title":"Compile the model for bare-metal","text":"<p>The model can be compiled with the following command:</p> <pre><code>iree-compile \\\n    --iree-stream-partitioning-favor=min-peak-memory \\\n    --iree-hal-target-backends=llvm-cpu \\\n    --iree-llvmcpu-target-triple=x86_64-pc-linux-elf \\\n    --iree-llvmcpu-debug-symbols=false \\\n    samples/models/simple_abs.mlir \\\n    -o /tmp/simple_abs_cpu.vmfb\n</code></pre> <p>In which</p> <ul> <li><code>--iree-stream-partitioning-favor=min-peak-memory</code>: Optimize for minimum peak     memory usage at the cost of concurrency - include when targeting     single-threaded execution to reduce memory consumption.</li> <li><code>--iree-hal-target-backends=llvm-cpu</code>: Compile using the LLVM CPU target</li> <li><code>--iree-llvmcpu-target-triple</code>: Use the <code>&lt;arch&gt;-pc-linux-elf</code> LLVM target triple     so the artifact has a fixed ABI to be rendered by the     elf_module library</li> <li><code>--iree-llvmcpu-debug-symbols=false</code>: To reduce the artifact size</li> </ul> <p>See generate.sh for example command-line instructions of some common architectures.</p> <p>You can replace the MLIR file with the other MLIR model files, following the instructions.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compiling-the-bare-metal-model-for-static-library-support","title":"Compiling the bare-metal model for static-library support","text":"<p>See the static_library demo sample for an example and instructions on running a model with IREE's <code>static_library_loader</code>.</p> <p>By default, the demo targets the host machine when compiling. To produce a bare-metal compatible model, run <code>iree-compile</code> as in the previous example and add the additional <code>-iree-llvmcpu-static-library-output-path=</code> flag to specify the static library destination. This will produce a <code>.h\\.o</code> file to link directly into the target application.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#build-bare-metal-runtime-from-source","title":"Build bare-metal runtime from source","text":"<p>A few CMake options and macros should be set to build a subset of IREE runtime libraries compatible with the bare-metal platform. We assume there's no multi-thread control nor system library support in the bare-metal system. The model execution is in a single-thread synchronous fashion.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#set-cmake-options","title":"Set CMake options","text":"<pre><code># Build the IREE runtime only\nset(IREE_BUILD_COMPILER OFF)\n\n# Tell CMake to skip targeting a specific operating system\nset(CMAKE_SYSTEM_NAME Generic)\n\n# Disable multi-thread library support\nset(IREE_ENABLE_THREADING OFF)\n\n# Only enable the local synchronous HAL driver\nset(IREE_HAL_DRIVER_DEFAULTS OFF)\nset(IREE_HAL_DRIVER_LOCAL_SYNC ON)\n\n# Only enable some executable loaders\nset(IREE_HAL_EXECUTABLE_LOADER_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF ON)\nset(IREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE ON)\n\n# Only enable the embedded ELF executable plugin\nset(IREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_PLUGIN_EMBEDDED_ELF ON)\n\n# Disable tests until IREE supports running them on bare-metal platforms\nset(IREE_BUILD_TESTS OFF)\n\n# Build samples\nset(IREE_BUILD_SAMPLES ON)\n</code></pre> <p>Todo</p> <p>Clean the list up after #6353 is fixed.</p> <p>Also, set the toolchain-specific cmake file to match the tool path, target architecture, target abi, linker script, system library path, etc.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#define-iree-macros","title":"Define IREE macros","text":"<p>These macros should be defined, either in C/C++ or via CMake options like</p> <pre><code>set(MY_FLAGS \"-DIREE_PLATFORM_GENERIC=1\")\nset(CMAKE_C_FLAGS ${MY_FLAGS} ${CMAKE_C_FLAGS})\nset(CMAKE_CXX_FLAGS ${MY_FLAGS} ${CMAKE_CXX_FLAGS})\n</code></pre> Macro Description <code>IREE_PLATFORM_GENERIC</code> Let IREE build the runtime library without targeting a specific platform. <code>IREE_SYNCHRONIZATION_DISABLE_UNSAFE=1</code> Disable thread synchronization support.Must only be used if there's a single thread. <code>IREE_FILE_IO_ENABLE=0</code> Disable file I/O. <code>IREE_TIME_NOW_FN</code> A function to return the system time. For the bare-metal systems, it can be set as <code>IREE_TIME_NOW_FN=\\\"\\{ return 0;\\}\\\"</code> as there's no asynchronous wait handling. <code>IREE_WAIT_UNTIL_FN</code> A function to wait until the given time in nanoseconds. Must match the signature <code>bool(uint64_t nanos)</code> and return false if the wait failed. <p>Examples of how to setup the CMakeLists.txt and .cmake file:</p> <ul> <li>IREE RISC-V toolchain cmake</li> <li>IREE Bare-Metal Arm Sample</li> <li>IREE Bare-Metal RV32 Sample</li> </ul>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#bare-metal-execution-example","title":"Bare-metal execution example","text":"<p>See simple_embedding for generic platform to see how to use the IREE runtime library to build/run the IREE model for the bare-metal target.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/","title":"CPU deployment","text":"<p>IREE supports efficient program execution on CPU devices by using LLVM to compile all dense computations in each program into highly optimized CPU native instruction streams, which are embedded in one of IREE's deployable formats.</p> <p>To compile a program for CPU execution, pick one of IREE's supported executable formats:</p> Executable Format Description embedded ELF portable, high performance dynamic library system library platform-specific dynamic library (.so, .dll, etc.) VMVX reference target <p>At runtime, CPU executables can be loaded using one of IREE's CPU HAL drivers:</p> <ul> <li><code>local-task</code>: asynchronous, multithreaded driver built on IREE's \"task\"    system</li> <li><code>local-sync</code>: synchronous, single-threaded driver that executes work inline</li> </ul> <p>Todo</p> <p>Add IREE's CPU support matrix: what architectures are supported; what architectures are well optimized; etc.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the LLVM-based CPU compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation or iOS cross-compilation page if you are cross compiling for a mobile device. The <code>llvm-cpu</code> compiler backend is compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_TARGET_BACKEND_LLVM_CPU</code> CMake option is <code>ON</code> when configuring for the host.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>You will need to get an IREE runtime that supports the local CPU HAL driver, along with the appropriate executable loaders for your application.</p> <p>You can check for CPU support by looking for the <code>local-sync</code> and <code>local-task</code> drivers:</p> <pre><code>$ iree-run-module --list_drivers\n\n        cuda: CUDA (dynamic)\n  local-sync: Local execution using a lightweight inline synchronous queue\n  local-task: Local execution using the IREE multithreading task system\n      vulkan: Vulkan 1.x (dynamic)\n</code></pre>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The local CPU HAL drivers are compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_HAL_DRIVER_LOCAL_TASK</code> and <code>IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF</code> (or other executable loader) CMake options are <code>ON</code> when configuring for the target.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-and-run-a-program","title":"Compile and run a program","text":"<p>With the requirements out of the way, we can now compile a model and run it.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the <code>llvm-cpu</code> target:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=llvm-cpu \\\n    mobilenet_iree_input.mlir -o mobilenet_cpu.vmfb\n</code></pre> <p>Tip - CPU targets</p> <p>The <code>--iree-llvmcpu-target-triple</code> flag tells the compiler to generate code for a specific type of CPU. You can see the list of supported targets with <code>iree-compile --iree-llvmcpu-list-targets</code>, or pass \"host\" to let LLVM infer the triple from your host machine (e.g. <code>x86_64-linux-gnu</code>).</p> <pre><code>$ iree-compile --iree-llvmcpu-list-targets\n\n  Registered Targets:\n    aarch64    - AArch64 (little endian)\n    aarch64_32 - AArch64 (little endian ILP32)\n    aarch64_be - AArch64 (big endian)\n    arm        - ARM\n    arm64      - ARM64 (little endian)\n    arm64_32   - ARM64 (little endian ILP32)\n    armeb      - ARM (big endian)\n    riscv32    - 32-bit RISC-V\n    riscv64    - 64-bit RISC-V\n    wasm32     - WebAssembly 32-bit\n    wasm64     - WebAssembly 64-bit\n    x86        - 32-bit X86: Pentium-Pro and above\n    x86-64     - 64-bit X86: EM64T and AMD64\n</code></pre> <p>Tip - CPU features</p> <p>The <code>--iree-llvmcpu-target-cpu-features</code> flag tells the compiler to generate code using certain CPU \"features\", like SIMD instruction sets. Like the target triple, you can pass \"host\" to this flag to let LLVM infer the features supported by your host machine.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#run-a-compiled-program","title":"Run a compiled program","text":"<p>In the build directory, run the following command:</p> <pre><code>tools/iree-run-module \\\n    --device=local-task \\\n    --module=mobilenet_cpu.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/gpu-cuda/","title":"GPU deployment using CUDA","text":"<p>IREE can accelerate model execution on Nvidia GPUs using CUDA.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#prerequisites","title":"Prerequisites","text":"<p>In order to use CUDA to drive the GPU, you need to have a functional CUDA environment. It can be verified by the following steps:</p> <pre><code>nvidia-smi | grep CUDA\n</code></pre> <p>If <code>nvidia-smi</code> does not exist, you will need to install the latest CUDA Toolkit SDK.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the CUDA compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build the IREE compiler, then enable the CUDA compiler target with the <code>IREE_TARGET_BACKEND_CUDA</code> option.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that includes the CUDA HAL driver.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE from source, then enable the CUDA HAL driver with the <code>IREE_HAL_DRIVER_CUDA</code> option.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"<p>With the compiler and runtime ready, we can now compile programs and run them on GPUs.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=cuda \\\n    --iree-hal-cuda-llvm-target-arch=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_cuda.vmfb\n</code></pre> <p>Note that a cuda target architecture (<code>iree-hal-cuda-llvm-target-arch</code>) of the form <code>sm_&lt;arch_number&gt;</code> is needed to compile towards each GPU architecture. If no architecture is specified then we will default to <code>sm_35</code>.</p> <p>Here is a table of commonly used architectures:</p> CUDA GPU Target Architecture Nvidia K80 <code>sm_35</code> Nvidia P100 <code>sm_60</code> Nvidia V100 <code>sm_70</code> Nvidia A100 <code>sm_80</code>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#run-a-compiled-program","title":"Run a compiled program","text":"<p>Run the following command:</p> <pre><code>iree-run-module \\\n    --device=cuda \\\n    --module=mobilenet_cuda.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-metal/","title":"GPU deployment using Metal","text":"<p>Documentation coming soon!</p>","tags":["GPU","iOS"]},{"location":"guides/deployment-configurations/gpu-rocm/","title":"GPU deployment using ROCm","text":"<p>IREE can accelerate model execution on AMD GPUs using ROCm.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#prerequisites","title":"Prerequisites","text":"<p>In order to use ROCm to drive the GPU, you need to have a functional ROCm environment. It can be verified by the following steps:</p> <pre><code>rocm-smi | grep rocm\n</code></pre> <p>If <code>rocm-smi</code> does not exist, you will need to install the latest ROCm Toolkit SDK for Windows or Linux.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Currently ROCm is NOT supported for the Python interface.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build the IREE compiler, then enable the ROCm compiler target with the <code>IREE_TARGET_BACKEND_ROCM</code> option.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that includes the ROCm HAL driver.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE from source, then enable the experimental ROCm HAL driver with the <code>IREE_EXTERNAL_HAL_DRIVERS=rocm</code> option.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"<p>With the compiler and runtime ready, we can now compile programs and run them on GPUs.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=rocm \\\n    --iree-rocm-target-chip=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_rocm.vmfb\n</code></pre> <p>Note that IREE comes with bundled bitcode files, which are used for linking certain intrinsics on AMD GPUs. These will be used automatically or if the <code>--iree-rocm-bc-dir</code> is empty. As additional support may be needed for different chips, users can use this flag to point to an explicit directory. For example, in ROCm installations on Linux, this is often found under <code>/opt/rocm/amdgcn/bitcode</code>.</p> <p>Note that a ROCm target chip (<code>iree-rocm-target-chip</code>) of the form <code>gfx&lt;arch_number&gt;</code> is needed to compile towards each GPU architecture. If no architecture is specified then we will default to <code>gfx908</code>.</p> <p>Here is a table of commonly used architectures:</p> AMD GPU Target Chip AMD MI25 <code>gfx900</code> AMD MI50 <code>gfx906</code> AMD MI60 <code>gfx906</code> AMD MI100 <code>gfx908</code> AMD MI300A <code>gfx940</code> AMD MI300 <code>gfx942</code>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#run-a-compiled-program","title":"Run a compiled program","text":"<p>Run the following command:</p> <pre><code>iree-run-module \\\n    --device=rocm \\\n    --module=mobilenet_rocm.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-vulkan/","title":"GPU deployment using Vulkan","text":"<p>IREE can accelerate model execution on GPUs via Vulkan, a low-overhead graphics and compute API. Vulkan is cross-platform: it is available on many operating systems, including Android, Linux, and Windows. Vulkan is also cross-vendor: it is supported by most GPU vendors, including AMD, ARM, Intel, NVIDIA, and Qualcomm.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#support-matrix","title":"Support matrix","text":"<p>As IREE and the compiler ecosystem it operates within matures, more target specific optimizations will be implemented. At this stage, expect reasonable performance across all GPUs and for improvements to be made over time for specific vendors and architectures.</p> GPU Vendor Category Performance Focus Architecture ARM Mali GPU Mobile Good Valhall+ Qualcomm Adreno GPU Mobile Reasonable 640+ AMD GPU Desktop/server Good RDNA+ NVIDIA GPU Desktop/server Good Turing+","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#prerequisites","title":"Prerequisites","text":"<p>In order to use Vulkan to drive the GPU, you need to have a functional Vulkan environment. IREE requires Vulkan 1.1 on Android and 1.2 elsewhere. It can be verified by the following steps:</p> Android Linux Windows <p>Android mandates Vulkan 1.1 support since Android 10. You just need to make sure the device's Android version is 10 or higher.</p> <p>Run the following command in a shell:</p> <pre><code>vulkaninfo | grep apiVersion\n</code></pre> <p>If <code>vulkaninfo</code> does not exist, you will need to install the latest Vulkan SDK. Installing via LunarG's package repository is recommended, as it places Vulkan libraries and tools under system paths so it's easy to discover.</p> <p>If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.</p> <p>Run the following command in a shell:</p> <pre><code>vulkaninfo | grep apiVersion\n</code></pre> <p>If <code>vulkaninfo</code> does not exist, you will need to install the latest Vulkan SDK.</p> <p>If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-compiler","title":"Get the IREE compiler","text":"<p>Vulkan expects the program running on GPU to be expressed by the SPIR-V binary exchange format, which the model must be compiled into.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the SPIR-V compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The SPIR-V compiler backend is compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_TARGET_BACKEND_VULKAN_SPIRV</code> CMake option is <code>ON</code> when configuring for the host.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that supports the Vulkan HAL driver.</p> <p>You can check for Vulkan support by looking for a matching driver and device:</p> <pre><code>$ iree-run-module --list_drivers\n\n        cuda: CUDA (dynamic)\n  local-sync: Local execution using a lightweight inline synchronous queue\n  local-task: Local execution using the IREE multithreading task system\n      vulkan: Vulkan 1.x (dynamic)\n</code></pre> <pre><code>$ iree-run-module --list_devices\n\n  cuda://GPU-00000000-1111-2222-3333-444444444444\n  local-sync://\n  local-task://\n  vulkan://00000000-1111-2222-3333-444444444444\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for Linux/Windows and the Android cross-compilation page for Android. The Vulkan HAL driver is compiled in by default on non-Apple platforms.</p> <p>Ensure that the <code>IREE_HAL_DRIVER_VULKAN</code> CMake option is <code>ON</code> when configuring for the target.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-and-run-a-program","title":"Compile and run a program","text":"<p>With the SPIR-V compiler and Vulkan runtime, we can now compile programs and run them on GPUs.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the <code>vulkan-spirv</code> target:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=vulkan-spirv \\\n    --iree-vulkan-target-triple=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_vulkan.vmfb\n</code></pre> <p>Note</p> <p>Currently a target triple of the form <code>&lt;vendor/arch&gt;-&lt;product&gt;-&lt;os&gt;</code> is needed to compile towards a specific GPU architecture.</p> <p>We don't support the full spectrum here(1); the following table summarizes the currently recognized ones.</p> <p>If no triple is specified, then a safe but more limited default will be used.</p> <p>This is more of a mechanism to help us develop IREE itself--in the long term we want to perform multiple targetting to generate to multiple architectures if no target triple is given.</p> <ol> <li>It's also impossible to capture all details of a Vulkan implementation    with a target triple, given the allowed variances on extensions, properties,    limits, etc. So the target triple is just an approximation for usage.</li> </ol> GPU Vendor Target Triple ARM Mali GPU e.g. <code>valhall-unknown-{android30|android31}</code> Qualcomm Adreno GPU e.g. <code>adreno-unknown-{android30|android31}</code> AMD GPU e.g. <code>{rdna1|rdna2|rdna3}-unknown-unknown</code> NVIDIA GPU e.g. <code>{turing|ampere}-unknown-unknown</code> SwiftShader CPU <code>cpu-swiftshader-unknown</code>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#run-a-compiled-program","title":"Run a compiled program","text":"<p>In the build directory, run the following command:</p> <pre><code>tools/iree-run-module \\\n    --device=vulkan \\\n    --module=mobilenet_vulkan.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU","Vulkan"]},{"location":"guides/ml-frameworks/","title":"ML frameworks","text":"<p>IREE supports popular machine learning frameworks using the same underlying technology.</p> <pre><code>graph LR\n  accTitle: ML framework to runtime deployment workflow overview\n  accDescr {\n    Programs start in some ML framework.\n    Programs are imported into MLIR.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  A[ML frameworks]\n  B[Imported MLIR]\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>"},{"location":"guides/ml-frameworks/#supported-frameworks","title":"Supported frameworks","text":"<p>See guides on how to use each framework with IREE:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and    TensorFlow Lite</li> </ul>"},{"location":"guides/ml-frameworks/#samples","title":"Samples","text":"<p>Check out the samples in IREE's <code>samples/</code> directory, as well as the iree-experimental repository.</p>"},{"location":"guides/ml-frameworks/#exportimport","title":"Export/Import","text":"<p>Each machine learning framework has some \"export\" mechanism that snapshots the structure and data in your program. These exported programs can then be \"imported\" into IREE's compiler by using either a stable import format or one of IREE's importer tools.</p> <p>This export/import process is specific to each frontend and typically involves a number of stages:</p> <ol> <li>Capture/trace/freeze the ML model into a graph</li> <li>Write that graph to an interchange format (e.g. SavedModel, TorchScript,    ONNX)</li> <li>Load the saved program into an import tool and convert to MLIR</li> <li>Legalize the graph's operations so only IREE-compatible operations remain</li> <li>Write the imported MLIR to a file</li> </ol> <p>This fully imported form can then be compiled indepedently of the source language and framework.</p>"},{"location":"guides/ml-frameworks/#compilation","title":"Compilation","text":"<p>IREE compiles MLIR files for specified sets of backends (CPU, GPU, etc). Each backend generates optimized native code custom to the input program and intended target platform. Once compiled, modules can be executed using IREE's runtime.</p> <p>See the deployment configuration guides for details on selecting a compiler backend and tuning options for your choice of target platform(s) or device(s).</p>"},{"location":"guides/ml-frameworks/#execution","title":"Execution","text":"<p>Compiled modules can be executed by selecting what compute devices to use, loading the module, and then executing it with the intended inputs. IREE provides several language bindings for its runtime API.</p>"},{"location":"guides/ml-frameworks/jax/","title":"JAX integration","text":"<p>Note</p> <p>IREE's JAX support is under development. This page is still under construction.</p>","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/jax/#overview","title":"Overview","text":"<p>IREE offers two ways to interface with JAX programs:</p> <ul> <li>An API for extracting and compiling full models ahead of time (AOT) for   execution apart from JAX. This API is being developed in the   iree-org/iree-jax repository.</li> <li>A PJRT plugin that adapts IREE as a native JAX backend for online / just in   time (JIT) use. This plugin is being developed in the   <code>integrations/pjrt</code> directory.</li> </ul>","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/onnx/","title":"ONNX support","text":"<p>Caution - under development</p> <p>Support for a broad set of ONNX operators and data types is an active investment area. See the ONNX Op Support tracking issue for the latest status.</p>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#overview","title":"Overview","text":"<p>Machine learning models using the Open Neural Network Exchange (ONNX) format can be deployed using the IREE compiler and runtime:</p> <pre><code>graph LR\n  accTitle: ONNX to runtime deployment workflow overview\n  accDescr {\n    Programs start as ONNX protobufs.\n    Programs are imported into MLIR using iree-import-onnx.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  A[\"ONNX\\n(protobuf)\"]\n  B[\"MLIR\\n(torch-mlir)\"]\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-onnx --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install ONNX:</p> <pre><code>python -m pip install onnx\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler[onnx] \\\n  iree-runtime\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler[onnx] \\\n  iree-runtime\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#quickstart","title":"Quickstart","text":"<ol> <li> <p>Start with a <code>.onnx</code> protobuf file, such as a model from    https://github.com/onnx/models.</p> </li> <li> <p>Convert the <code>.onnx</code> file into MLIR using the <code>iree-import-onnx</code> tool:</p> <pre><code>iree-import-onnx [model.onnx] -o [model.mlir]\n</code></pre> <p>This tool produces a MLIR file with the help of the torch-mlir project.</p> </li> <li> <p>Once imported, the standard set of tools and APIs available for any of    IREE's deployment configurations and    API bindings can be used:</p> <pre><code>iree-compile \\\n  model.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  -o model_cpu.vmfb\n\niree-run-module \\\n  model_cpu.vmfb \\\n  --device=local-task \\\n  --entry_function=... \\\n  --input=... \\\n  ...\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#samples","title":"Samples","text":"Code samples Curated op and model tests SHARK-TestSuite <code>e2eshark/onnx</code> Generated op tests SHARK-TestSuite <code>iree_tests/onnx</code> Importer tests torch-mlir <code>test/python/onnx_importer</code>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#troubleshooting","title":"Troubleshooting","text":"","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#failed-to-legalize-operation-that-was-explicitly-marked-illegal","title":"Failed to legalize operation that was explicitly marked illegal","text":"<p>If you see an error compiling a converted .mlir file like this:</p> <pre><code>$ iree-compile model.mlir --iree-hal-target-backends=llvm-cpu -o model.vmfb\n\nmodel.mlir:507:12: error: failed to legalize operation 'torch.operator' that was explicitly marked illegal\n    %503 = torch.operator \"onnx.Identity\"(%arg0) : (!torch.vtensor&lt;[?],si64&gt;) -&gt; !torch.vtensor&lt;[?],si64&gt;\n           ^\n</code></pre> <p>There are several possible scenarios:</p> <ol> <li>The operator is not implemented, or the implementation is missing a case.    Search for a matching issue in one of these places:<ul> <li>https://github.com/llvm/torch-mlir/issues</li> <li>https://github.com/nod-ai/SHARK-Turbine/issues</li> </ul> </li> <li> <p>The operator is implemented but only for a more recent ONNX version. You can    try upgrading your .onnx file using the    ONNX Version Converter:</p> convert_onnx_model.py<pre><code>import onnx\noriginal_model = onnx.load_model(\"model.onnx\")\nconverted_model = onnx.version_converter.convert_version(original_model, 17)\nonnx.save(converted_model, \"model_17.onnx\")\n</code></pre> <p>and then attempting the convert -&gt; compile again:</p> <pre><code>iree-import-onnx model_17.onnx -o model_17.mlir\niree-compile model_17.mlir ...\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/","title":"PyTorch + IREE =","text":"<p>Caution - under development</p> <p>We are still validating and fixing specific models. Between bug fixes in flight and releases running behind, we don't expect that you will be able to do a lot of advanced things without using nightly releases or working with us.</p> <p>Stay tuned and join the discussion in our Discord server's <code>#pytorch</code> channel.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#overview","title":"Overview","text":"<p>iree-turbine (rebrand pending from SHARK-Turbine) offers a tight integration between compatible versions of IREE, torch-mlir, and PyTorch.</p> <ul> <li> Seamless integration with standard PyTorch workflows</li> <li> Deployment support for running PyTorch models on cloud and edge devices</li> <li> General purpose model compilation and execution tools</li> </ul> <p>Both just-in-time (JIT) and ahead-of-time (AOT) workflows are supported:</p> <pre><code>graph LR\n  accTitle: PyTorch integration overview\n  accDescr {\n    PyTorch programs can be optimized within a Python session with\n    iree-turbine's just-in-time tools.\n    PyTorch programs can be exported out of Python to native binaries using\n    iree-turbine's ahead-of-time export toolkit.\n  }\n\n  subgraph Python\n    pytorch(PyTorch)\n    subgraph turbine [iree-turbine]\n      jit(\"Eager execution (JIT)\")\n      aot(\"Export toolkit (AOT)\")\n    end\n\n    pytorch --&gt; jit\n    jit --&gt; pytorch\n    pytorch --&gt; aot\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  aot -.-&gt; binary</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#prerequisites","title":"Prerequisites","text":"<p>Install a recent version of PyTorch (<code>2.3.0+</code>, prerelease as of April 2024):</p> <pre><code>python -m pip install \\\n  --pre --index-url https://download.pytorch.org/whl/test/cpu torch==2.3.0\n</code></pre> <p>Install iree-turbine:</p> <pre><code>python -m pip install iree-turbine\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#just-in-time-jit-execution","title":"Just-in-time (JIT) execution","text":"<p>Just-in-time integration allows for Python code using TorchDynamo to optimize PyTorch models/functions using IREE, all within an interactive Python session.</p> <pre><code>graph TD\n  accTitle: PyTorch JIT workflow overview\n  accDescr {\n    Programs start as either PyTorch nn.Module objects or callable functions.\n    Programs are compiled into optimized modules using torch.compile.\n    Within torch.compile, Dynamo runs the program through Turbine and IREE.\n  }\n\n  subgraph Python\n    input([nn.Module / function])\n\n    subgraph compile [\"torch.compile()\"]\n      direction LR\n      dynamo{{TorchDynamo}}\n      turbine{{iree-turbine}}\n      iree{{IREE}}\n      dynamo --&gt; turbine --&gt; iree\n    end\n\n    output([Optimized module])\n    input --&gt; compile --&gt; output\n  end</code></pre> <p>For deployment outside of Python, see the ahead-of-time sections below.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#quickstart","title":"Quickstart","text":"<p>Turbine integrates into PyTorch as a custom backend for <code>torch.compile</code>.</p> <p>Behind the scenes, PyTorch captures the structure of the input model into a computation graph and feeds that graph through to the selected backend compiler.</p> <pre><code>import torch\n\n# Define the `nn.Module` or Python function to run.\nclass LinearModule(torch.nn.Module):\n  def __init__(self, in_features, out_features):\n    super().__init__()\n    self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n    self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n  def forward(self, input):\n    return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Compile the program using the turbine backend.(1)\nopt_linear_module = torch.compile(linear_module, backend=\"turbine_cpu\")\n\n# Use the compiled program as you would the original program.\nargs = torch.randn(4)\nturbine_output = opt_linear_module(args)\n</code></pre> <ol> <li>Initial integration only supports CPU, but support for many of IREE's other    targets is coming soon.</li> </ol>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples","title":"Samples","text":"Code samples JIT compilation notebook Simple MLP eager <code>core/examples/eager_mlp/mlp_eager_simple.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#ahead-of-time-aot-export","title":"Ahead-of-time (AOT) export","text":"<p>The ahead-of-time toolkit allows developers to define a program's structure in Python and then export deployment-ready artifacts that can be used in IREE's deployment configurations via the API bindings.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#simple-api","title":"Simple API","text":"<p>For simple models, a one-shot export API is available.</p> <pre><code>graph LR\n  accTitle: PyTorch simple AOT workflow overview\n  accDescr {\n    Programs start as PyTorch nn.Module objects.\n    Modules are exported using the \"aot\" API.\n    Exported outputs are then compiled to .vmfb files with executable binaries.\n  }\n\n  subgraph Python\n    input([nn.Module])\n    export([\"ExportOutput (MLIR)\"])\n    input -- \"aot.export()\" --&gt; export\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  export -. \"compile()\" .-&gt; binary</code></pre> <pre><code>import iree.runtime as ireert\nimport numpy as np\nimport shark_turbine.aot as aot\nimport torch\n\n# Define the `nn.Module` to export.\nclass LinearModule(torch.nn.Module):\n  def __init__(self, in_features, out_features):\n    super().__init__()\n    self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n    self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n  def forward(self, input):\n    return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Export the program using the simple API.\nexample_arg = torch.randn(4)\nexport_output = aot.export(linear_module, example_arg)\n\n# Compile to a deployable artifact.\nbinary = export_output.compile(save_to=None)\n\n# Use the IREE runtime API to test the compiled program.\nconfig = ireert.Config(\"local-task\")\nvm_module = ireert.load_vm_module(\n    ireert.VmModule.wrap_buffer(config.vm_instance, binary.map_memory()),\n    config,\n)\ninput = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)\nresult = vm_module.main(input)\nprint(result.to_host())\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_1","title":"Samples","text":"Code samples Simple AOT export notebook Import Whisper from  Hugging Face notebook Simple MLP export <code>core/examples/aot_mlp/mlp_export_simple.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#advanced-api","title":"Advanced API","text":"<p>For more complex models, an underlying advanced API is available that gives access to more features.</p> <pre><code>graph LR\n  accTitle: PyTorch advanced AOT workflow overview\n  accDescr {\n    Programs are represented using the aot.CompiledModule class.\n    CompiledModules can extend nn.Module objects, export globals, and set\n    shapes and dtypes for each function.\n    Modules are exported using the \"aot\" API.\n    Exported outputs are then compiled to .vmfb files with executable binaries.\n  }\n\n  subgraph Python\n    compiledmodule(\"aot.CompiledModule\\n\\n- extend nn.Module\\n- export globals\\n- set shapes/dtypes\")\n    export([\"ExportOutput (MLIR)\"])\n    compiledmodule -- \"aot.export()\" --&gt; export\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  export -. \"compile()\" .-&gt; binary</code></pre> <p>Advanced export workflows can use the <code>aot.CompiledModule</code> class to define and constrain the structure of a program prior to compiling it.</p> <pre><code>import shark_turbine.aot as aot\n\n# A minimal program, with no functions or variables.\nclass BasicModule(aot.CompiledModule):\n  ...\n\n# Create an instance of the program and convert it to MLIR.\nfrom iree.compiler.ir import Context\ninstance = BasicModule(context=Context())\nmodule_str = str(aot.CompiledModule.get_mlir_module(instance))\n\nprint(module_str)\n# module @basic {\n# }\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#exporting-functions","title":"Exporting functions","text":"<p>Exported functions are the API entry points into a compiled program.</p> <p>Simple feed-forward neural networks used for inference may have a single exported function (typically called \"forward\"), while more complex programs can have multiple computation functions, initialization functions, \"backward\" methods for training, state management functions, debugging functions, etc.</p> <ul> <li> <p>Each instance method on a <code>aot.CompiledModule</code>-derived class is exported.   These instance methods can include calls to other <code>aot</code> components, such as   <code>aot.jittable</code> compute functions:</p> <pre><code>class GetOnesModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_ones():\n    return torch.ones(3)\n\n  def get_ones(self):\n    return self.compute_ones()\n</code></pre> </li> <li> <p>Instance methods can use <code>aot.AbstractTensor</code> to specify data types:</p> <pre><code>class IntSumModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_sum(a, b):\n    return a + b\n\n  def sum_int32(\n    self,\n    a=aot.AbstractTensor(2, dtype=torch.int32),\n    b=aot.AbstractTensor(2, dtype=torch.int32),\n  ):\n    return self.compute_sum(a, b)\n</code></pre> </li> <li> <p>Shapes can be made dynamic using <code>aot.AbstractTensor</code> and <code>aot.jittable</code>   constraints:</p> <pre><code>class DynamicSumModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_sum(a, b):\n    return a + b\n\n  def sum_dynamic(\n    self,\n    a=aot.AbstractTensor(None),\n    b=aot.AbstractTensor(None),\n  ):\n    return self.compute_sum(\n        a,\n        b,\n        constraints=[\n            a.dynamic_dim(0) == b.dynamic_dim(0),\n        ],\n    )\n</code></pre> </li> </ul>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#global-variables","title":"Global variables","text":"<p>Global variables are used to represent persistent state within a program instance.</p> <p>For example, they can be used to represent the weights and biases in a neural network, and exporting these as mutable variables can allow for setting their values independently at runtime.</p> <ul> <li> <p>Individual globals can be exported using <code>aot.export_global()</code>:</p> <pre><code>state_example = torch.zeros([1], dtype=torch.int32)\n\nclass SampleModule(aot.CompiledModule):\n  value = aot.export_global(state_example, mutable=True)\n\n  def get_value(self):\n    return self.value\n\n  def update_value(self, new_value=aot.abstractify(value)):\n    self.value = new_value\n</code></pre> </li> <li> <p>All named parameters on a <code>nn.Module</code> can be exported using   <code>export_parameters()</code>:</p> <pre><code>class SimpleParams(torch.nn.Module):\n  def __init__(self):\n    super().__init__()\n    self.classifier = torch.nn.Linear(20, 30)\n\n  def forward(self, x):\n    return self.classifier(x)\n\nm = SimpleParams()\n\nclass SimpleParamsModule(aot.CompiledModule):\n  params = aot.export_parameters(m)\n  compute = aot.jittable(m.forward)\n\n  def run(self, x=aot.AbstractTensor(128, 20)):\n    return self.compute(x)\n\n  # torch.nn.Linear has 'weight' and 'bias' variables:\n  #   https://pytorch.org/docs/stable/generated/torch.nn.Linear.html\n  # Add getters for both exported parameters.\n\n  def get_weight(self):\n    return self.params[\"classifier.weight\"]\n\n  def get_bias(self):\n    return self.params[\"classifier.bias\"]\n</code></pre> </li> </ul>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_2","title":"Samples","text":"Code samples Advanced AOT export notebook PyTorch dynamic shapes notebook AOT unit tests <code>core/tests/aot/</code> Dynamic MLP export <code>core/examples/aot_mlp/mlp_export_dynamic.py</code> stateless llama2 <code>models/turbine_models/custom_models/stateless_llama.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/tensorflow/","title":"TensorFlow integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#overview","title":"Overview","text":"<p>IREE supports compiling and running TensorFlow programs represented as <code>tf.Module</code> classes or stored in the <code>SavedModel</code> format.</p> <pre><code>graph LR\n  accTitle: TensorFlow to runtime deployment workflow overview\n  accDescr {\n    Programs start as either TensorFlow SavedModel or tf.Module programs.\n    Programs are imported into MLIR as StableHLO.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  subgraph A[TensorFlow]\n    direction TB\n    A1[SavedModel]\n    A2[tf.Module]\n\n    A1 --- A2\n  end\n\n  subgraph B[MLIR]\n    B1[StableHLO]\n  end\n\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-tf --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install TensorFlow by following the     official documentation:</p> <pre><code>python -m pip install tf-nightly\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tf\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tf\n</code></pre> </li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#importing-models","title":"Importing models","text":"<p>IREE compilers transform a model into its final deployable format in several sequential steps. The first step for a TensorFlow model is to use either the <code>iree-import-tf</code> command-line tool or IREE's Python APIs to import the model into a format (i.e., MLIR) compatible with the generic IREE compilers.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#from-savedmodel-on-tensorflow-hub","title":"From SavedModel on TensorFlow Hub","text":"<p>IREE supports importing and using SavedModels from TensorFlow Hub.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#using-the-command-line-tool","title":"Using the command-line tool","text":"<p>First download the SavedModel and load it to get the serving signature, which is used as the entry point for IREE compilation flow:</p> <pre><code>import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\nprint(list(loaded_model.signatures.keys()))\n</code></pre> <p>Note</p> <p>If there are no serving signatures in the original SavedModel, you may add them by yourself by following \"Missing serving signature in SavedModel\".</p> <p>Then you can import the model with <code>iree-import-tf</code>. You can read the options supported via <code>iree-import-tf -help</code>. Using MobileNet v2 as an example and assuming the serving signature is <code>predict</code>:</p> <pre><code>iree-import-tf\n  --tf-import-type=savedmodel_v1 \\\n  --tf-savedmodel-exported-names=predict \\\n  /path/to/savedmodel -o iree_input.mlir\n</code></pre> <p>Tip</p> <p><code>iree-import-tf</code> is installed as <code>/path/to/python/site-packages/iree/tools/tf/iree-import-tf</code>. You can find out the full path to the <code>site-packages</code> directory via the <code>python -m site</code> command.</p> <p>Tip</p> <p><code>-tf-import-type</code> needs to match the SavedModel version. You can try both v1 and v2 if you see one of them gives an empty dump.</p> <p>Next, you can compile the model in <code>iree_input.mlir</code> for one of IREE's supported targets by following one of the deployment configuration guides.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#samples","title":"Samples","text":"Colab notebooks Training an MNIST digits classifier Edge detection Pretrained ResNet50 inference TensorFlow Hub import <p>End-to-end execution tests can be found in IREE's integrations/tensorflow/e2e/ directory.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#troubleshooting","title":"Troubleshooting","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#missing-serving-signature-in-savedmodel","title":"Missing serving signature in SavedModel","text":"<p>Sometimes SavedModels are exported without explicit serving signatures. This happens by default for TensorFlow Hub SavedModels. However, serving signatures are required as entry points for IREE compilation flow. You can use Python to load and re-export the SavedModel to give it serving signatures. For example, for MobileNet v2, assuming we want the serving signature to be <code>predict</code> and operating on a 224x224 RGB image:</p> <pre><code>import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\ncall = loaded_model.__call__.get_concrete_function(\n         tf.TensorSpec([1, 224, 224, 3], tf.float32))\nsignatures = {'predict': call}\ntf.saved_model.save(loaded_model,\n  '/path/to/resaved/model/', signatures=signatures)\n</code></pre> <p>The above will create a new SavedModel with a serving signature, <code>predict</code>, and save it to <code>/path/to/resaved/model/</code>.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/","title":"TensorFlow Lite integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#overview","title":"Overview","text":"<p>IREE supports compiling and running TensorFlow Lite (TFLite) programs stored as TFLite FlatBuffers. These files can be imported into an IREE-compatible format then compiled to a series of backends.</p> <pre><code>graph LR\n  accTitle: TFLite to runtime deployment workflow overview\n  accDescr {\n    Programs start as TensorFlow Lite FlatBuffers.\n    Programs are imported into MLIR's TOSA dialect using iree-import-tflite.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  subgraph A[TFLite]\n    A1[FlatBuffer]\n  end\n\n  subgraph B[MLIR]\n    B1[TOSA]\n  end\n\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-tflite --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install TensorFlow by following the     official documentation:</p> <pre><code>python -m pip install tf-nightly\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tflite\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tflite\n</code></pre> </li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#importing-and-compiling","title":"Importing and Compiling","text":"<p>IREE's tooling is divided into two components: import and compilation.</p> <ol> <li>The import tool converts the TFLite FlatBuffer to an IREE compatible form,   validating that only IREE compatible operations remain. Containing a combination   of TOSA and IREE operations.</li> <li>The compilation stage generates the bytecode module for a list of targets,   which can be executed by IREE.</li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-command-line-tools","title":"Using Command Line Tools","text":"<p>These two stages can be completed entirely via the command line.</p> <pre><code>WORKDIR=\"/tmp/workdir\"\nTFLITE_URL=\"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\nTFLITE_PATH=${WORKDIR}/model.tflite\nIMPORT_PATH=${WORKDIR}/tosa.mlir\nMODULE_PATH=${WORKDIR}/module.vmfb\n\n# Fetch the sample model\nwget ${TFLITE_URL} -O ${TFLITE_PATH}\n\n# Import the sample model to an IREE compatible form\niree-import-tflite ${TFLITE_PATH} -o ${IMPORT_PATH}\n\n# Compile for the CPU backend\niree-compile \\\n    --iree-input-type=tosa \\\n    --iree-hal-target-backends=llvm-cpu \\\n    ${IMPORT_PATH} \\\n    -o ${MODULE_PATH}\n</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-the-python-api","title":"Using the Python API","text":"<p>The example below demonstrates downloading, compiling, and executing a TFLite model using the Python API. This includes some initial setup to declare global variables, download the sample module, and download the sample inputs.</p> <p>Declaration of absolute paths for the sample repo and import all required libraries. The default setup uses the CPU backend as the only target. This can be reconfigured to select alternative targets.</p> <pre><code>import iree.compiler.tflite as iree_tflite_compile\nimport iree.runtime as iree_rt\nimport numpy\nimport os\nimport urllib.request\n\nfrom PIL import Image\n\nworkdir = \"/tmp/workdir\"\nos.makedirs(workdir, exist_ok=True)\n\ntfliteFile = \"/\".join([workdir, \"model.tflite\"])\njpgFile = \"/\".join([workdir, \"input.jpg\"])\ntfliteIR = \"/\".join([workdir, \"tflite.mlir\"])\ntosaIR = \"/\".join([workdir, \"tosa.mlir\"])\nbytecodeModule = \"/\".join([workdir, \"iree.vmfb\"])\n\nbackends = [\"llvm-cpu\"]\nconfig = \"local-task\"\n</code></pre> <p>The TFLite sample model and input are downloaded locally.</p> <pre><code>tfliteUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\njpgUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8_input.jpg\"\n\nurllib.request.urlretrieve(tfliteUrl, tfliteFile)\nurllib.request.urlretrieve(jpgUrl, jpgFile)\n</code></pre> <p>Once downloaded we can compile the model for the selected backends. Both the TFLite and TOSA representations of the model are saved for debugging purposes. This is optional and can be omitted.</p> <pre><code>iree_tflite_compile.compile_file(\n  tfliteFile,\n  input_type=\"tosa\",\n  output_file=bytecodeModule,\n  save_temp_tfl_input=tfliteIR,\n  save_temp_iree_input=tosaIR,\n  target_backends=backends,\n  import_only=False)\n</code></pre> <p>After compilation is completed we configure the VmModule using the local-task configuration and compiled IREE module.</p> <pre><code>config = iree_rt.Config(\"local-task\")\ncontext = iree_rt.SystemContext(config=config)\nwith open(bytecodeModule, 'rb') as f:\n  vm_module = iree_rt.VmModule.from_flatbuffer(config.vm_instance, f.read())\n  context.add_vm_module(vm_module)\n</code></pre> <p>Finally, the IREE module is loaded and ready for execution. Here we load the sample image, manipulate to the expected input size, and execute the module. By default TFLite models include a single function named 'main'. The final results are printed.</p> <pre><code>im = numpy.array(Image.open(jpgFile).resize((192, 192))).reshape((1, 192, 192, 3))\nargs = [im]\n\ninvoke = context.modules.module[\"main\"]\niree_results = invoke(*args)\nprint(iree_results)\n</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#samples","title":"Samples","text":"<ul> <li> <p>The tflitehub folder in the iree-experimental repository contains test scripts to compile, run, and compare various TensorFlow Lite models sourced from TensorFlow Hub.</p> </li> <li> <p>An example smoke test of the TensorFlow Lite C API is available here.</p> </li> </ul> Colab notebooks Text classification with TFLite and IREE","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#troubleshooting","title":"Troubleshooting","text":"<p>Failures during the import step usually indicate a failure to lower from TensorFlow Lite's operations to TOSA, the intermediate representation used by IREE. Many TensorFlow Lite operations are not fully supported, particularly those than use dynamic shapes. Please reach out on one of IREE's communication channels if you notice something missing.</p>","tags":["Python","TensorFlow"]},{"location":"reference/","title":"Reference pages","text":""},{"location":"reference/#api-bindings","title":"API bindings","text":"<p>IREE offers API bindings for compiling and running programs from various languages.</p> <ul> <li>Index page</li> </ul>"},{"location":"reference/#mlir-dialects","title":"MLIR dialects","text":"<p>Automatically generated documentation for the MLIR dialects defined in the IREE repository.</p> <ul> <li>Index page</li> </ul>"},{"location":"reference/#other-topics","title":"Other topics","text":"<ul> <li>Glossary</li> <li>Optimization options</li> <li>Extensions</li> </ul>"},{"location":"reference/extensions/","title":"Extension mechanisms","text":"<p>Note</p> <p>Much of this describes provisions for extension within IREE but until the core of the system has settled little work will be done to fully flesh-out and document them in detail. A large majority of things that would make someone want to extend IREE can instead be accomplished much easier and performantly using native MLIR dialects that are then processed by the IREE compiler.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#guidelines","title":"Guidelines","text":"<p>IREE has a compiler and runtime separation, a multi-layered architecture, and split between execution of \"host code\" that schedules compute-heavy work and SPMD \"device code\" that performs the bulk of compute operations. Each axis has a different set of extension mechanisms that can be used independently or combined.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#extension-philosophy","title":"Extension philosophy","text":"<p>Organized below are some of the mechanisms IREE provides for extending the core compiler and runtime and when they should(n't) be used. The goal of these progressively lower-level extension mechanisms is to make it easier for users to fall into the pit of success:</p> <p>Quote</p> <p>\"a well-designed system makes it easy to do the right things and annoying (but not impossible) to do the wrong things.\" - Jeff Atwood</p> <p>The amount of engineering complexity for initial bring-up and maintenance increases with each subsequently lower-level approach and it is best to start from the top and exit as fast as possible: this is a choose-your-own-adventure where you're trying to escape the dungeon with both the loot and your limbs . Avoid the temptation of immediately dropping down to making external C calls at runtime because that's how it's been done before as it's easier, more robust, and more performant to use the system as it is intended to be used.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-extend","title":"When to extend","text":"<p>The primary goal when extending any framework should first be to avoid extending it at all. There is no mechanism that is free - whether in terms of engineering effort to develop and maintain over time, include in compiler deployments, or include in runtime deployments. As a system scales in deployment configurations the available mechanisms for extension increase but so too does the chaos introduced by extensions that do not also scale with that design. Users are the only ones who can determine the tradeoffs they are willing to accept: for example, the mechanism to extend device code with a custom runtime call to a C function does not work on GPUs and gets significantly more complicated on CPUs as sandboxes/enclaves are used - but if the user scenario is for local process CPU-only execution that may not matter.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-inputscompilerruntime","title":"Where to extend (inputs/compiler/runtime)","text":"<p>Consider in normal software development when one would choose to write more code (possibly packaging it into a reusable library) vs. changing the programming language or compiler they are using to compile their code vs. changing the operating systems their code runs on. The further one gets from the problem they are trying to solve the more work, coordination, and maintenance is involved and though there are reasons to make changes across the stack they should be done only when a simpler solution would not suffice.</p> <p>An author will retain more control over their logic the closer they sit to the inputs to the compiler. IREE provides several mechanisms that try to keep control with the author and robust to changes in IREE or MLIR internals and it is strongly encouraged that those looking to extend take those routes first. Contributions that help everyone are very welcome but do have a higher cost and it's often much easier to design and justify upstream changes with working examples in forks or at higher levels of the stack.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-hostdevice","title":"Where to extend (host/device)","text":"<p>From a performance perspective the rule is to colocate code with the data it is acting on: tensor data, for example, should almost exclusively be manipulated by device code as tensors live on device. Attempting to use tensor data with host code will result in synchronization points and host/device transfers that can decimate performance. This can lead to seemingly paradoxical situations where swapping out compiler-generated code for a human-authored \"fast path\" can be slower than even the most naive compiler results. An important thing to keep in mind with compilers is that it is exceedingly difficult to produce code by hand that is consistently more performant across a broad range of deployments and the first temptation should always be to improve the compiler - extending it via other mechanisms when not required by the task is often just premature optimization.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#1-target-iree-input-dialects","title":"1. Target IREE input dialects","text":"<p>TL;DR</p> <p>Convert your custom ops into standard MLIR dialects.</p> <pre><code>+------------+      +--------+      +---------------+\n| Your input | -+-&gt; |  iree  | -+-&gt; | IREE compiler |\n+------------+  |   +--------+  |   +---------------+\n                |   +--------+  |\n                +-&gt; | linalg | -+\n                |   +--------+  |\n                |      ....     |\n</code></pre> <p>The easiest, cleanest, and most robust path to extend IREE is to make use of what MLIR is designed for: composing dialects and converting between them. IREE supports several input dialects such as <code>tosa</code>, <code>mhlo</code>, <code>linalg</code>, and the standard <code>arith</code>, <code>math</code>, <code>tensor</code>, and <code>scf</code> dialects. Any source IR that can be turned into that mix of dialects (directly or transitively) will work with the whole IREE pipeline for all deployment configurations and targets. If possible to express the computation in this form it will always be the best route to getting small deployments without the need to modify or include any additional code at runtime and run on all device types and execution modes.</p> <p>This mechanism can also be layered with any of the subsequent lower-level ones: if some part of the operation runs on the host and some part on device then decomposing it such that it contains as many standard ops for flow control as possible and linear algebra/custom ops for the dense math will reduce the engineering effort required on both sides and lead to an easier to maintain solution even if lower-level extension is required.</p> <p>A large majority of classic ML \"custom ops\" can be accomplished with this approach. When bringing up projects built on IREE it's best to concisely describe the operation in more elemental mathematical representations and then add optimizations where required knowing that things will still work even if those optimizations never happen.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros","title":"Pros","text":"<ul> <li>No IREE compiler or runtime code changes required.<ul> <li>Can use standard IREE packaged releases and tools.</li> <li>No versioning issues at runtime.</li> </ul> </li> <li>IREE's host/device partitioning can partition your code.</li> <li>Fusion and other compiler techniques (CSE/DCE/inlining/etc) work on your code.</li> <li>All target backends (CPU/GPU/accelerators/enclaves/etc) work.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons","title":"Cons","text":"<ul> <li>Input dialects cannot natively represent all possible programs (such as file   IO and other syscalls).</li> <li>Performance-sensitive host code (b-trees and other in-memory databases) will   run through the slower VM paths if not authored as dense compute.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use","title":"When to use","text":"<ul> <li> Targeting multiple MLIR toolchains of which IREE is just   one (as little to no IREE-specific code is required).</li> <li> Operation represents host code in addition to device code.</li> <li> All code is known statically or symbolically at   compile-time (instead of independently versioned libraries at runtime).</li> <li> Complex high-performance code not representable as linear algebra.</li> <li> External runtime interactions (file/network/user IO). Use   custom modules.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation","title":"Implementation","text":"<p>To make use of this approach one just needs to follow the standard MLIR dialect conversion behavior: add a dialect with ops, add a conversion pass, and run that pass before providing the resulting IR to the IREE compiler. See Creating a Dialect.</p> <p>Think of this like authoring C++ sources with templates that you compile into your application: Clang (and LLVM beyond) don't know about your library details and instead just process it as it would any other code. You can take the same source and pass it to GCC and it'll be robust to underlying changes in the system.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#2-extend-host-code-with-custom-modules","title":"2. Extend host code with custom modules","text":"<p>TL;DR</p> <p>Import MLIR functions in the compiler and custom modules at runtime.</p> <pre><code>// Main user module compiled by IREE:\nmodule @model {\n  // Declare a synchronous external function:\n  func.func private @my_custom_module.sync_func(%input: tensor&lt;?xf32&gt;) -&gt; i32\n  // Declare an asynchronous external function:\n  func.func private @my_custom_module.async_func(%input: tensor&lt;?xf32&gt;) -&gt; tensor&lt;?xf32&gt; attributes {\n    iree.abi.model = \"coarse-fences\",\n    nosideeffects\n  }\n  func.func @predict() {\n    ...\n    // Call a synchronous/blocking external function:\n    %sync_result = call @my_custom_module.sync_func(%sync_input) : (tensor&lt;?xf32&gt;) -&gt; i32\n    ...\n    ...\n    // Call an asynchronous/non-blocking external function:\n    %async_result = call @my_custom_module.async_func(%async_input) : (tensor&lt;?xf32&gt;) -&gt; tensor&lt;?xf32&gt;\n    ...\n  }\n}\n</code></pre> <p>IREE provides dynamic linking at runtime via its VM interfaces. For code that runs on the host and requires syscalls or calling out to existing libraries - such as file IO, text processing, and JPEG decoding - this is an easy way to interop without paying attention to the more complex details of device code. An IREE module compiled using custom modules is portable and dynamically deployable so long as the custom module is registered at runtime.</p> <p>This approach conceptually matches what normal native binaries do in an OS: imports are declared and at runtime they are resolved based on the available exports of modules in the system. Just as with normal systems engineering design of the API between modules is up to the user and depending on rigor can have several pitfalls but these problems and their solutions are not IREE specific and anyone who has designed a shared library interface can apply the same rules here in IREE around versioning, performance, etc. One does not add 2 integers via a syscall and the same holds here: custom modules and the functions within should perform a large amount of work to hide overheads involved in the cross-module calls and users must be aware that the compiler cannot optimize across the call boundaries.</p> <p>See the synchronous tensor I/O and asynchronous tensor I/O samples.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_1","title":"Pros","text":"<ul> <li>No IREE compiler code changes required.</li> <li>Produced artifacts are portable across IREE deployment configurations.</li> <li>Full system access is allowed - the VM just calls external functions.</li> <li>Runtime modules can be implemented (via shims) in other languages/runtimes.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_1","title":"Cons","text":"<ul> <li>Custom modules must be registered at runtime by the user.</li> <li>The VM custom module ABI goo must be authored by the user (such as with JNI or   pybind to move between java/python and C).</li> <li>All custom module code must be compiled and deployed regardless of how much   any modules use. The granularity of modules and their versioning is up to the   user.</li> <li>Custom module code cannot be optimized by the IREE compiler to avoid   host/device readbacks and unnecessary data type conversion.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_1","title":"When to use","text":"<ul> <li> Interactions with large libraries or system calls.</li> <li> Performance-sensitive host code that cannot easily be   represented as device code (like UTF-8 string transformation using libicu).</li> <li> Extensively using tensor resources.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_1","title":"Implementation","text":"<p>The runtime portion requires that the code be exported to the VM system by way of an <code>iree_vm_module_t</code> interface. A low-level native interface exists with minimal overhead and is used for example by the IREE HAL itself. There is also a C++ wrapper that is significantly easier to work with however it needs some performance improvements.</p> <p>Full end-to-end examples can be found under <code>samples/custom_modules/</code>:</p> <ul> <li>The basic sample shows how to add VM modules with custom types and take advantage of ABI features like fallback functions and optional imports.</li> <li>The synchronous tensor I/O sample shows a call taking and returning a tensor and performing blocking work.</li> <li>The asynchronous tensor I/O sample shows the same thing but with fences for asynchronous scheduling.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#3-extend-target-specific-device-conversion-patterns","title":"3. Extend target-specific device conversion patterns","text":"<p>TL;DR</p> <p>Add patterns to <code>iree/Compiler/Codegen/</code> to emit target code.</p> <p>The easiest and most robust path for specializations of device code is to emit such code mixed with the IREE compiler generated code at the highest possible level of abstraction within the target pipeline. For example, if the code can be represented with the <code>vector</code> dialect then inserting conversion patterns between <code>linalg</code> and <code>vector</code> enables the emitted code to be specialized further based on user configuration and optimized with the full set of available passes that run in the pipeline. For each level lower one goes the more flexibility they gain such as being able to emit inline assembly blocks that do anything while trading off generality and multi-targeting applicability.</p> <p>How much the tradeoff matters is based on the behavior of the extension. If a pattern changing a transcendental function to an approximation can operate at the vector level then all IREE deployment targets can benefit from the pattern and as new targets are made available they will automatically receive the benefits. In contrast, a pattern at the vector level that turns generic vector operations into architecture-specific LLVM intrinsics by its nature only pertains to a single target family and can be done at a lower level. As a rule of thumb if a particular pattern is going to need ~N implementations for ~N targets that are all mostly the same it's better to try to move that higher in the stack.</p> <p>At this point the complexity of extending things is still fairly constrained: a C++ pass or pattern is verified with normal lit tests and can be upstreamed easily either into MLIR or IREE (a large number of IREE patterns are upstreamed, benefiting all users of MLIR). Cross-compilation and versioning are not a factor and the IREE artifacts can be considered durable at a coarse level (outside of major target architectural changes).</p> <p>Note that depending on the target there are various mechanisms for representing code in MLIR, up to including inline assembly snippets in IR via <code>llvm.inline_asm</code>.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_2","title":"Pros","text":"<ul> <li>Not limited to what is possible to represent in any particular MLIR dialect.</li> <li>Rich target configuration available; multiple passes can contribute info.</li> <li>Produced executable binaries are hermetic and no runtime changes are required.</li> <li>Specialization can happen in MLIR dialects like <code>linalg</code> or <code>vector</code> as well   as target-specific representations like SPIR-V and LLVM IR.</li> <li>The compiler can perform deep optimizations across both the generated code and   the provided code (hoisting/loop invariant code motion/cse/etc).</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_2","title":"Cons","text":"<ul> <li>Requires implementing the patterns as code in the IREE compiler or via TBD   interfaces.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_2","title":"When to use","text":"<ul> <li> Code that must be emitted during target lowering - such as   something optimizing for a particular CPU architecture.</li> <li> Hot code mixed with generated code at a fine granularity   (within the innermost loop).</li> <li> External existing hand-authored libraries. Either statically   or dynamically link instead.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_2","title":"Implementation","text":"<p>There are several ways to author patterns and passes in MLIR. As examples:</p> <ul> <li>A majority of patterns are authored in C++ using PatternRewriter.</li> <li>PDL is an MLIR-based way to   express rewrite operations with strong typing, compile-time verification, and   easily-readable and less-verbose IR.</li> <li><code>linalg</code> uses a python-based DSL   for defining some of its extended ops.</li> </ul> <p>There are many examples within both MLIR and IREE, one specifically being the polynomial approximation expansion patterns.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#4-include-external-target-specific-device-code","title":"4. Include external target-specific device code","text":"<p>TL;DR</p> <p>Statically link external object files into IREE executables.</p> <p>For large bodies of existing device code or library calls that are available for static linkage the work involved to reimplement them at higher levels of the stack can be cost prohibitive even if it leads to better results. In these cases just as with a normal toolchain one would just want to declare an external function, call it, and add the object file to the linker command line. In IREE the same can be performed by way of taking compatible bitcode or native object files and linking them in with the generated code. An MLIR pattern would declare and emit the call and the target-specific IREE linker would pull in the objects.</p> <p>As the linking behavior varies per target (for example, some targets like SPIR-V don't have traditional linkers) how this is performed is up to the IREE target backends. The complexity involved in producing the object files to link will also vary per-backend and the complexity of the deployment: cross-compiling for multiple architectures or compilation modes (ASAN, etc) will require unique copies of the object files matching that precise configuration.</p> <p>At this point generality is largely out as is the ability to cleanly upstream such files. It should be apparent how a few dozen lines of C++ or PDL that avoids the need for any of this complexity is more appealing. In extremely specific cases of a single platform/architecture/version for a single program deployed via a specific artifact composition it's not so bad but IREE is designed such that extreme specificity is an optional mode of the more general solution. This does not mean this mechanism is not useful in some situations and only that it should be a last-resort when one of the easier to manage solutions is not viable - not a shortcut to avoid writing some C++ patterns.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_3","title":"Pros","text":"<ul> <li>Works with hand-authored code in compatible object files from any toolchain.</li> <li>No IREE runtime changes required.<ul> <li>All deployment modes still work, including multi-targeting.</li> <li>No versioning concerns as custom code is included in artifacts.</li> </ul> </li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_3","title":"Cons","text":"<ul> <li>Users must provide per-target precompiled object files on disk.</li> <li>IREE compiler changes are still needed for generating the external calls.</li> <li>Though LTO may be able to optimize across the calls it is not guaranteed.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_3","title":"When to use","text":"<ul> <li> Existing math libraries or architecture-specific functions   that cannot be ported into a more MLIR-friendly form.</li> <li> Mixing in hand-authored code written in C/rust/etc with   generated code from MLIR.</li> <li> External code can be represented as either <code>linalg</code>,   <code>vector</code>, or LLVM IR. Use target-specific conversion patterns instead.</li> <li> External code size is large and unlikely to benefit from   link-time optimizations (such as something like libjpeg). Dynamically link   instead.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_3","title":"Implementation","text":"<p>As the linking behavior varies per target backend there is no general solution at this level: if targeting the CPU then the system native linker or lld need to be provided the object files, while SPIR-V will need to merge the SPIR-V binaries directly, and Metal shader libraries will need to be constructed with the Apple-specific <code>metallib</code> tooling. Producing these files and performing the linking is outside the scope of IREE.</p> <p>If the files can be acquired then compiler changes will be required to emit calls to them and invoke the linker with the the files.</p> <p>On the CPU an alternative is to use the static library output mode where IREE produces an object file and then the user invokes the linker themselves; this still requires the compiler changes to emit the calls but avoids needing to teach the compiler how to link the files.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#5-dynamically-link-target-specific-device-code-cpu-only","title":"5. Dynamically link target-specific device code (CPU only)","text":"<p>TL;DR</p> <p>Dynamically link external C functions at runtime from device code.</p> <p>It is pitch black. You are likely to be eaten by a grue.</p> <p>This is the lowest-level integration in the system and is designed to act as an escape hatch and - as with any emergency escape hatch - it's not designed for ergonomics. Users should try first to come in through the door and attempting to use this mechanism should trigger alarms about the approach being attempted.</p> <p>IREE's execution model for device code and native machine binary deployment mechanisms are designed with several constraints in order to make all of the above approaches possible and performant. Calling arbitrary C functions from deep within the system can introduce subtle (and not-so-subtle) bugs that are extremely difficult to track down and versioning between the compiler emitting the calls and the runtime providing the implementations can cause skew unless held carefully. Consider the methods added here like syscalls in that they must be extremely focused and if they are ever likely to change (including being removed) then care will be needed just as with versioning or redirecting a syscall. Designing good stable interfaces is hard and a classic pit of failure.</p> <p>Some things to note:</p> <ul> <li>Device code executes in a tiled fashion and single dispatches may invoke the   same function many times from many threads concurrently to perform   the larger work.</li> <li>Tiles may execute in any order and on any thread; performing fine-grained   locking within the tile can lead to deadlocks.</li> <li>Device code is stateless in order to allow for access restrictions and caching   across multiple loaded models - any library state required must be externally   managed via process globals.</li> <li>Device code may be running out-of-process (sandbox/enclave) and the library   functions must be available where the dispatches run and not where they are   launched (such as being linked into the sandbox binary, if separate from the   main process binary).</li> <li>The stack must be used to pass arguments/results to external calls via a   single pointer and there is no libffi-like functionality for magically calling   arbitrary C functions. Users must provide the shims they need.</li> <li>Thread-local storage is unavailable in the called code (it may be usable, but   it is not guaranteed it'll work on all platforms and leaks are likely).</li> <li>No heap allocator is provided and the use of libc malloc is unsupported.</li> </ul> <p>Most of the constraints here come from the SPMD parallelism model, platform-agnostic deployment format, and overall data-oriented design of IREE. Code operating in this fashion has a certain shape and that is usually not the same as big legacy single-threaded CPU-focused BLAS libraries that perform their own caching, internal thread and state management, and other shenanigans. IREE is not designed to wrap such things and if any of these notes are issues it is more an indicator that the approach needs adjustment than anything else. Trying to bypass or workaround the constraints is possible - after all IREE is an open source project and any user is welcome to fork it - but unsupported by the core IREE team.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_4","title":"Pros","text":"<ul> <li>Function resolution at runtime is orthogonal to compiler target specification.</li> <li>Machine code can be shared between the application and IREE artifacts.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_4","title":"Cons","text":"<ul> <li>IREE compiler and runtime must both be modified.</li> <li>Deeper integration with the IREE codegen compiler infrastructure required.</li> <li>ABI versioning complexity between compiler and runtime.</li> <li>Runtimes must ship the imports for the lifetime of any artifact compiled to   use them.<ul> <li>Humans are bad at predicting the future.</li> <li>Using the same artifact in different binaries at runtime requires changes   to each binary - including those that may not be owned by the person   producing the artifact.</li> <li>Weak imports and conditional usage can help but still leads to bloat.</li> </ul> </li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_4","title":"When to use","text":"<ul> <li> Calling into opaque closed-source BLAS-like microkernel   libraries.</li> <li> Any other cases covered above can be used, especially   microkernels that can be represented in MLIR or as statically linked   libraries.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_4","title":"Implementation","text":"<p>The compiler is changed to produce calls to imports via a dynamic import table provided to each dispatch function. The import table is declared in the executable library for use at runtime. Runtime applications register an import provider to resolve named symbols in the import table to C functions that marshal arguments and results.</p> <p>The compiler-side needs some additional work but an example is included here: Issue 7504. The runtime-side is complete and resolution is performed by a user-supplied <code>iree_hal_executable_import_provider_t</code>.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/","title":"Glossary","text":"<p>IREE exists in an ecosystem of projects and acts as a bridge between machine learning frameworks and a variety of hardware platforms. This glossary outlines some of those projects and technologies.</p> <p>Something missing?</p> <p>Don't see a project of technology here that you think should be? We welcome contributions on our GitHub page!</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#jax","title":"JAX","text":"<p>JAX is Python framework supporting high-performance machine learning research by bridging automatic differentiation and ML compilers like XLA and IREE.</p> <p>See the JAX Integration guide for details on how to use JAX programs with IREE.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#mlir","title":"MLIR","text":"<p>Multi-Level Intermediate Representation (MLIR) is the compiler framework that IREE is built around. Beyond the tooling this includes a set of common dialects and transformations that IREE utilizes for its code generation system.</p> <p>For general discussion on MLIR see the project's discourse forum.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#linalg","title":"Linalg","text":"<p>Linalg is an MLIR dialect that defines Linear Algebra operations in a generalized fashion by modeling iteration spaces together with compute payloads. Linalg includes a set of commonly used operations as well as generic interfaces.</p> <p>IREE uses the Linalg dialect during its code generation pipeline to define tensor operations then generate loop structures for its various backend targets.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#openxla","title":"OpenXLA","text":"<p>OpenXLA is a community-driven, open source ML compiler ecosystem.</p> <p>IREE interfaces with some of the OpenXLA projects, such as StableHLO.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#pytorch","title":"PyTorch","text":"<p>PyTorch is an optimized tensor library for deep learning.</p> <p>PyTorch uses the Torch-MLIR project to interface with projects like IREE. See the PyTorch Integration guide for details on how to use PyTorch programs with IREE.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#spir-v","title":"SPIR-V","text":"<p>SPIR-V is a shader and kernel intermediate language for expressing parallel computation typically used for GPUs. It serves as a hardware agnostic assembly format for distributing complex, computationally intensive programs.</p> <p>IREE uses the SPIR-V MLIR Dialect in its code generation pipeline for Vulkan and other compute APIs.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#stablehlo","title":"StableHLO","text":"<p>StableHLO is a set of versioned high-level operations (HLOs) for ML models with backward and forward compatibility guarantees. StableHLO aims to improve interoperability between frameworks (such as TensorFlow, JAX, and PyTorch) and ML compilers.</p> <p>StableHLO has both a specification and an MLIR dialect.</p> <p>IREE uses the StableHLO MLIR Dialect as one of its input formats.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tosa","title":"TOSA","text":"<p>Tensor Operator Set Architecture (TOSA) provides a set of tensor operations commonly employed by Deep Neural Networks. TOSA defines accuracy and compatibility constraints so frameworks that use it can trust that applications will produce similar results on a variety of hardware targets.</p> <p>TOSA has both a specification and an MLIR dialect.</p> <p>IREE uses the TOSA MLIR dialect as one of its input formats.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tflite","title":"TFLite","text":"<p>TensorFlow Lite (TFLite) is a library for deploying models on mobile and other edge devices.</p> <p>IREE supports running TFLite programs that have been imported into MLIR using the TOSA dialect. See the TFLite Integration guide for details on how to use TFLite programs with IREE.</p> <p>IREE also has bindings for the TFLite C API, see the <code>runtime/bindings/tflite/</code> directory for details.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/optimization-options/","title":"Optimization options","text":"<p>This page documents various supported flags for optimizing IREE programs. Each is presented with its English name, flag to enable/disable, and default state.</p> <p>These flags can be passed to the:</p> <ul> <li><code>iree-compile</code> command line tool</li> <li><code>extra_args=[\"--flag\"]</code> argument to <code>iree.compiler.tools</code> Python wrappers</li> <li>In-process Python compiler API   <code>iree.compiler.transforms.iree-compile.CompilerOptions(\"--flag\", \"--flag2\")</code>   constructor</li> <li><code>ireeCompilerOptionsSetFlags()</code> compiler C API function</li> </ul>"},{"location":"reference/optimization-options/#high-level-program-optimizations","title":"High level program optimizations","text":""},{"location":"reference/optimization-options/#constant-evaluation-iree-opt-const-eval-on","title":"Constant evaluation (<code>--iree-opt-const-eval</code> (on))","text":"<p>Performs compile-time evaluation of any global initializers which produce the initial values for global constants, storing the global directly in the program as constant data. This extracts such constant program fragments and recursively compiles them, using the runtime to evaluate the results.</p> <p>Note that this only has any effect on computations in module initializer functions, not free-standing operations in the program which may produce constant-derived results. See <code>--iree-opt-const-expr-hoisting</code> for options to optimize these.</p>"},{"location":"reference/optimization-options/#constant-expression-hoisting-iree-opt-const-expr-hoisting-off","title":"Constant expression hoisting (<code>--iree-opt-const-expr-hoisting</code> (off))","text":"<p>Identifies all trees of constant expressions in the program and uses a heuristic to determine which would be profitable to hoist into global initializers for evaluation at module load. Together with <code>--iree-opt-const-eval</code>, this will convert eligible trees of expressions to purely static data embedded in the module.</p> <p>The heuristic is currently relatively primitive, using static information to disable hoisting of leaf operations which are metadata only (i.e. broadcasts, etc) or are expected to fold away as part of operator fusion. Notably, the current heuristic is likely to pessimize module size in the case of complicated programs with trees of constant, large tensors.</p>"},{"location":"reference/optimization-options/#numeric-precision-reduction-iree-opt-numeric-precision-reduction-off","title":"Numeric precision reduction (<code>--iree-opt-numeric-precision-reduction</code> (off))","text":"<p>Analyzes program constant data and program flow to identify math operations which can be safely evaluated with reduced precision (currently with a minimum of 8bit integers but being extended to infer any bit depth) and inserts appropriate casts. In conjunction with Constant Expression Hoisting, Constant Evaluation and other automatic optimizations, this can produce programs where large amounts (up to the whole) have had their numeric operations and constant data rewritten to lower precision types.</p> <p>This feature is actively evolving and will be the subject of dedicated documentation when ready.</p>"},{"location":"reference/optimization-options/#strip-debug-assertions-iree-opt-strip-assertions-off","title":"Strip Debug Assertions (<code>--iree-opt-strip-assertions</code> (off))","text":"<p>Strips all <code>std.assert</code> ops in the input program after useful information for optimization analysis has been extracted. Assertions provide useful user-visible error messages but can prevent critical optimizations. Assertions are not, however, a substitution for control flow and frontends that want to check errors in optimized release builds should do so via actual code - similar to when one would <code>if (foo) return false;</code> vs. <code>assert(foo);</code> in a normal program.</p>"},{"location":"reference/bindings/","title":"API bindings","text":"<p>API bindings allow for programmatic use of IREE's compiler and runtime components. The core IREE project is written in C<sup>1</sup>, allowing for API bindings to be written in a variety of other languages.</p> <p>Something missing?</p> <p>Want to use another language? Looking for something specific out of one of those already listed?</p> <p>We welcome discussions on our communication channels and contributions on our GitHub page!</p>"},{"location":"reference/bindings/#official-api-bindings","title":"Official API bindings","text":"<p>Members of the core project team and other partner groups maintain these official bindings:</p> Language Compiler API? Runtime API? Published packages? C/C++  Supported  Supported  Unsupported Python  Supported  Supported  Supported"},{"location":"reference/bindings/#cc","title":"C/C++","text":"<p>See the C API reference page.</p>"},{"location":"reference/bindings/#python","title":"Python","text":"<p>See the Python reference page.</p>"},{"location":"reference/bindings/#unofficial-and-experimental-api-bindings","title":"Unofficial and experimental API bindings","text":"<p>Members of our developer community have authored bindings using other languages:</p> Language Compiler API? Runtime API? Published packages? JavaScript  Experimental  Experimental  Unsupported Java  Unsupported  Experimental  Unsupported Julia  Experimental  Experimental  Unsupported Rust  Unsupported  Experimental  Experimental"},{"location":"reference/bindings/#javascript","title":"JavaScript","text":"<ul> <li>JavaScript bindings for WebAssembly and WebGPU are under development in IREE's <code>experimental/web/</code> directory.</li> </ul>"},{"location":"reference/bindings/#java","title":"Java","text":"<ul> <li>Java TFLite bindings were developed at one point in IREE's <code>runtime/bindings/tflite/java</code> directory.</li> </ul>"},{"location":"reference/bindings/#julia","title":"Julia","text":"<ul> <li>Coil.jl is an experimental package to lower and execute Julia tensor operations to IREE.</li> </ul>"},{"location":"reference/bindings/#rust","title":"Rust","text":"<ul> <li>iree-rs is a crate containing rustic bindings for the IREE runtime.</li> </ul> <ol> <li> <p>with some C++ tools and utilities\u00a0\u21a9</p> </li> </ol>"},{"location":"reference/bindings/c-api/","title":"C API bindings","text":""},{"location":"reference/bindings/c-api/#overview","title":"Overview","text":"<p>The IREE compiler and IREE runtime both have their own C/C++ APIs. This page introduces the available APIs and describes how to use them from your applications.</p> <p>Note</p> <p>There are multiple ways to distribute and depend on C/C++ projects, each with varying levels of portability, flexibility, and toolchain compatibility. IREE aims to support common configurations and platforms.</p>"},{"location":"reference/bindings/c-api/#compiler-api","title":"Compiler API","text":"<p>The IREE compiler is structured as a monolithic shared object with a dynamic plugin system allowing for extensions. The shared object exports symbols for versioned API functions.</p> <pre><code>graph TD\n  accTitle: IREE compiler linkage model diagram\n  accDescr {\n    The libIREECompiler.so or IREECompiler.dll shared object contains pipelines,\n    target backends, and general passes as private implementation details.\n    Compiler plugins interface with the compiler shared object to extend it with\n    custom targets, dialects, etc.\n    Applications interface with the compiler shared object through the compiler\n    C API's exported symbols.\n  }\n\n  subgraph compiler[libIREECompiler.so / IREECompiler.dll]\n    pipelines(\"Pipelines\n\n    \u2022 Flow\n    \u2022 Stream\n    \u2022 etc.\")\n\n    targets(\"Target backends\n\n    \u2022 llvm-cpu\n    \u2022 vulkan-spirv\n    \u2022 etc.\")\n\n    passes(\"General passes\n\n    \u2022 Const eval\n    \u2022 DCE\n    \u2022 etc.\")\n  end\n\n  plugins(\"Compiler plugins\n\n    \u2022 Custom targets\n    \u2022 Custom dialects\n    \u2022 etc.\")\n\n  application(Your application)\n\n  compiler &lt;-- \"Plugin API&lt;br&gt;(static or dynamic linking)\" --&gt; plugins\n  compiler -. \"Compiler C API&lt;br&gt;(exported symbols)\" .-&gt; application</code></pre> <p>API definitions can be found in the following locations:</p> Source location Overview <code>iree/compiler/embedding_api.h</code> Top-level IREE compiler embedding API <code>iree/compiler/PluginAPI/</code> directory IREE compiler plugin API <code>mlir/include/mlir-c/</code> directory MLIR C API headers"},{"location":"reference/bindings/c-api/#concepts","title":"Concepts","text":"<p>The compiler API is centered around running pipelines to translate inputs to artifacts. These are modeled via sessions, invocations, sources, and outputs.</p> <pre><code>stateDiagram-v2\n  accTitle: IREE compiler session and invocation state diagram\n  accDescr {\n    Input files are opened (or buffers are wrapped) as sources in a session.\n    Sources are parsed into invocations, which run pipelines.\n    Output files are written (or buffers are mapped) for compilation artifacts.\n    Sessions can contain multiple sources and run multiple invocations.\n  }\n\n  direction LR\n  InputFile --&gt; Source1 : open file\n  InputBuffer --&gt; Source2 : wrap buffer\n\n  state Session {\n    Source1 --&gt; Invocation1\n    Source2 --&gt; Invocation2\n    Invocation1 --&gt; Invocation1 : run pipeline\n    Invocation2 --&gt; Invocation2 : run pipeline\n  }\n\n  Invocation1 --&gt; Output1File   : write file\n  Invocation1 --&gt; Output1Buffer : map memory\n  Invocation2 --&gt; Output2Buffer : map memory</code></pre>"},{"location":"reference/bindings/c-api/#sessions","title":"Sessions","text":"<p>A session (<code>iree_compiler_session_t</code>) is a scope where one or more invocations can run.</p> <ul> <li>Internally, sessions consist of an <code>MLIRContext</code> and a private set of   options.</li> <li>Sessions may activate available plugins based on their options.</li> </ul>"},{"location":"reference/bindings/c-api/#invocations","title":"Invocations","text":"<p>An invocation (<code>iree_compiler_invocation_t</code>) is a discrete run of the compiler.</p> <ul> <li>Invocations run pipelines, consisting of passes, to translate from   sources to outputs.</li> </ul>"},{"location":"reference/bindings/c-api/#sources","title":"Sources","text":"<p>A source (<code>iree_compiler_source_t</code>) represents an input program, including operations and data.</p> <ul> <li>Sources may refer to files or buffers in memory.</li> </ul>"},{"location":"reference/bindings/c-api/#outputs","title":"Outputs","text":"<p>An output (<code>iree_compiler_output_t</code>) represents a compilation artifact.</p> <ul> <li>Outputs can be standalone files or more advanced streams.</li> </ul>"},{"location":"reference/bindings/c-api/#plugins","title":"Plugins","text":"<p>A plugin extends the compiler with some combination of target backends, options, passes, or pipelines. For documentation on compiler plugins, see <code>compiler/PluginAPI/README.md</code>.</p>"},{"location":"reference/bindings/c-api/#usage","title":"Usage","text":"<p>This snippet shows the general layout of the API. For working examples, see the samples below.</p> <p>To build a custom tool using the compiler API:</p> CMakeLists.txt<pre><code>set(_IREE_COMPILER_API \"${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler\")\ntarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_COMPILER_API})\ntarget_link_libraries(${_NAME} iree_compiler_bindings_c_loader)\n</code></pre> iree_compiler_demo.c<pre><code>#include &lt;iree/compiler/embedding_api.h&gt;\n#include &lt;iree/compiler/loader.h&gt;\n\nint main(int argc, char** argv) {\n  // Load the compiler library then initialize it.\n  ireeCompilerLoadLibrary(\"libIREECompiler.so\");\n  ireeCompilerGlobalInitialize();\n\n  // Create a session to track compiler state and set flags.\n  iree_compiler_session_t *session = ireeCompilerSessionCreate();\n  ireeCompilerSessionSetFlags(session, argc, argv);\n\n  // Open a file as an input source to the compiler.\n  iree_compiler_source_t *source = NULL;\n  ireeCompilerSourceOpenFile(session, \"input.mlir\", &amp;source);\n\n  // Use an invocation to compile from the input source to one or more outputs.\n  iree_compiler_invocation_t *inv = ireeCompilerInvocationCreate(session);\n  ireeCompilerInvocationPipeline(inv, IREE_COMPILER_PIPELINE_STD);\n\n  // Output the compiled artifact to a file.\n  iree_compiler_output_t *output = NULL;\n  ireeCompilerOutputOpenFile(\"output.vmfb\", &amp;output);\n  ireeCompilerInvocationOutputVMBytecode(inv, output);\n\n  // Cleanup state.\n  ireeCompilerInvocationDestroy(inv);\n  ireeCompilerOutputDestroy(output);\n  ireeCompilerSourceDestroy(source);\n  ireeCompilerSessionDestroy(session);\n  ireeCompilerGlobalShutdown();\n}\n</code></pre>"},{"location":"reference/bindings/c-api/#samples","title":"Samples","text":"Project Source Description iree-org/iree-template-compiler-cmake <code>hello_compiler.c</code> Compiler application template iree-org/iree <code>integrations/pjrt/.../iree_compiler.cc</code> JIT for TensorFlow + JAX to IREE iree-org/iree <code>compiler/plugins</code> In-tree supported compiler plugins iree-org/iree <code>samples/compiler_plugins/</code> In-tree sample compiler plugins nod-ai/iree-amd-aie <code>plugins/.../iree-amd-aie</code> Early-phase plugins for interfacing with AMD AIE accelerators"},{"location":"reference/bindings/c-api/#runtime-api","title":"Runtime API","text":"<p>The IREE runtime is structured as a modular set of library components. Each component is designed to be linked into applications directly and compiled with LTO style optimizations.</p> <p>The low level library components can be used directly or through a higher level API.</p> High level APILow level API <p>The high level 'runtime' API sits on top of the low level components. It is relatively terse but does not expose the full flexibility of the underlying systems.</p> <pre><code>graph TD\n  accTitle: IREE runtime high level API diagram\n  accDescr {\n    The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n    their own types and API methods.\n    A high level \"runtime API\" sits on top of these component APIs.\n    Applications can interface indirectly with the IREE runtime via this\n    high level runtime API.\n  }\n\n  subgraph iree_runtime[IREE Runtime]\n    subgraph base\n      base_types(\"Types\n\n      \u2022 allocator\n      \u2022 status\n      \u2022 etc.\")\n    end\n\n    subgraph hal[HAL]\n      hal_types(\"Types\n\n      \u2022 buffer\n      \u2022 device\n      \u2022 etc.\")\n\n      hal_drivers(\"Drivers\n\n      \u2022 local-*\n      \u2022 vulkan\n      \u2022 etc.\")\n    end\n\n    subgraph vm[VM]\n      vm_types(\"Types\n\n      \u2022 context\n      \u2022 invocation\n      \u2022 etc.\")\n    end\n\n    runtime_api(\"Runtime API\n\n    \u2022 instance\n    \u2022 session\n    \u2022 call\")\n\n    base_types &amp; hal_types &amp; hal_drivers &amp; vm_types --&gt; runtime_api\n  end\n\n  application(Your application)\n\n  runtime_api --&gt; application</code></pre> <p>Each runtime component has its own low level API. The low level APIs are typically verbose as they expose the full flexibility of each underlying system.</p> <pre><code>graph TD\n  accTitle: IREE runtime low level API diagram\n  accDescr {\n    The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n    their own types and API methods.\n    Applications can interface directly with the IREE runtime via the low\n    level component APIs.\n  }\n\n  subgraph iree_runtime[IREE Runtime]\n    subgraph base\n      base_types(\"Types\n\n      \u2022 allocator\n      \u2022 status\n      \u2022 etc.\")\n    end\n    subgraph hal[HAL]\n      hal_types(\"Types\n\n      \u2022 buffer\n      \u2022 device\n      \u2022 etc.\")\n\n      hal_drivers(\"Drivers\n\n      \u2022 local-*\n      \u2022 vulkan\n      \u2022 etc.\")\n    end\n    subgraph vm[VM]\n      vm_types(\"Types\n\n      \u2022 context\n      \u2022 invocation\n      \u2022 etc.\")\n    end\n  end\n\n  application(Your application)\n\n  base_types &amp; hal_types &amp; hal_drivers &amp; vm_types --&gt; application</code></pre> <p>Runtime API header files are organized by component:</p> Component header file Overview <code>iree/runtime/api.h</code> High level runtime API <code>iree/base/api.h</code> Core API, type definitions, ownership policies, utilities <code>iree/vm/api.h</code> VM APIs: loading modules, I/O, calling functions <code>iree/hal/api.h</code> HAL APIs: device management, synchronization, accessing hardware features"},{"location":"reference/bindings/c-api/#high-level-concepts","title":"High level concepts","text":"<p>The high level API uses instances, sessions, and calls to run programs with a small API surface.</p> <pre><code>stateDiagram-v2\n  accTitle: IREE runtime high level API state diagram\n  accDescr {\n    Instances track sessions and state: options, drivers, devices.\n    Sessions track calls and state: a device and bytecode/VM modules.\n    Calls track input and output lists.\n  }\n\n  state iree_runtime_instance_t {\n    instance_state: state&lt;br&gt;- options&lt;br&gt;- drivers&lt;br&gt;- devices\n\n    state iree_runtime_session_t {\n      session_state: state&lt;br&gt;- device&lt;br&gt;- VM / bytecode modules\n      state iree_runtime_call_t  {\n        inputs\n        outputs\n      }\n    }\n  }</code></pre>"},{"location":"reference/bindings/c-api/#instance","title":"Instance","text":"<p>An instance (<code>iree_runtime_instance_t</code>) isolates runtime usage and manages device resources.</p> <ul> <li>Instances may service multiple sessions to avoid extra device interaction   and reuse caches/pools.</li> <li>Separate instances are isolated/sandboxed from one another.</li> </ul>"},{"location":"reference/bindings/c-api/#session","title":"Session","text":"<p>A session (<code>iree_runtime_session_t</code>) contains a set of loaded modules and their state.</p> <ul> <li>Sessions that share an instance may share resources directly.</li> <li>Sessions that do not share an instance can transfer resources using   import and export APIs.</li> </ul>"},{"location":"reference/bindings/c-api/#call","title":"Call","text":"<p>A call (<code>iree_runtime_call_t</code>) is a stateful VM function call builder.</p> <ul> <li>Calls can be reused to avoid having to construct input lists for each   invocation.</li> </ul>"},{"location":"reference/bindings/c-api/#low-level-concepts","title":"Low level concepts","text":""},{"location":"reference/bindings/c-api/#base","title":"Base","text":"<p>Under construction, more coming soon</p>"},{"location":"reference/bindings/c-api/#vm","title":"VM","text":"<p>IREE uses its own Virtual Machine (VM) at runtime to interpret program instructions on the host system.</p> Tip - EmitC alternate lowering path <p>VM instructions may be further lowered to C source code for static or resource constrained deployment.</p> <p>See the <code>--output-format=vm-c</code> compiler option and the samples in <code>samples/emitc_modules/</code> for more information.</p> <p>The VM supports generic operations like loads, stores, arithmetic, function calls, and control flow. The VM builds streams of more complex program logic and dense math into HAL command buffers that are dispatched to hardware backends.</p> <ul> <li>VM instances can serve multiple isolated execution contexts.</li> <li>VM contexts are effectively sandboxes for loading modules and running   programs.</li> <li> <p>VM modules provide all functionality to execution contexts, including   access to hardware accelerators through the HAL. Compiled user programs are   also modules.</p> <pre><code>stateDiagram-v2\n  accTitle: Sample VM Modules\n  accDescr {\n    Bytecode modules contain program state, program functions, and debug\n    information.\n    HAL modules contain devices, executables, HAL functions, and HAL types.\n    Custom modules may contain external functions and custom types.\n  }\n\n  state \"Bytecode module\" as bytecode {\n    bytecode_contents: Module state&lt;br&gt;Program functions&lt;br&gt;Debug information\n  }\n\n  state \"HAL module\" as HAL {\n    hal_contents: Devices&lt;br&gt;Executables&lt;br&gt;HAL functions&lt;br&gt;HAL types\n  }\n\n  state \"Custom module\" as custom {\n    custom_contents: External functions&lt;br&gt;Custom types\n  }</code></pre> </li> </ul>"},{"location":"reference/bindings/c-api/#hal","title":"HAL","text":"<p>IREE uses a Hardware Abstraction Layer (HAL) to model and interact with hardware devices like CPUs, GPUs and other accelerators.</p> <ul> <li>HAL drivers are used to enumerate and create HAL devices.</li> <li>HAL devices interface with hardware, such as by allocating device memory,   preparing executables, recording and dispatching command buffers, and   synchronizing with the host.</li> <li>HAL buffers represent data storage and buffer views represent views into   that storage with associated shapes and types (similar to \"tensors\").</li> </ul>"},{"location":"reference/bindings/c-api/#usage_1","title":"Usage","text":"<p>For other examples, see the samples below.</p> hello_world_terse.chello_world_explained.c <p>Source file: <code>runtime/src/iree/runtime/demo/hello_world_terse.c</code></p> runtime/src/iree/runtime/demo/hello_world_terse.c<pre><code>#include &lt;stdio.h&gt;\n\n#include \"iree/runtime/api.h\"\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance);\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session);\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n\nint main(int argc, char** argv) {\n  // Create and configure the instance shared across all sessions.\n  iree_runtime_instance_options_t instance_options;\n  iree_runtime_instance_options_initialize(&amp;instance_options);\n  iree_runtime_instance_options_use_all_available_drivers(&amp;instance_options);\n  iree_runtime_instance_t* instance = NULL;\n  IREE_CHECK_OK(iree_runtime_instance_create(\n      &amp;instance_options, iree_allocator_system(), &amp;instance));\n\n  // All sessions should share the same instance.\n  iree_runtime_demo_run_session(instance);\n\n  iree_runtime_instance_release(instance);\n  return 0;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance) {\n  // TODO(#5724): move device selection into the compiled modules.\n  iree_hal_device_t* device = NULL;\n  IREE_CHECK_OK(iree_runtime_instance_try_create_default_device(\n      instance, iree_make_cstring_view(\"local-task\"), &amp;device));\n\n  // Create one session per loaded module to hold the module state.\n  iree_runtime_session_options_t session_options;\n  iree_runtime_session_options_initialize(&amp;session_options);\n  iree_runtime_session_t* session = NULL;\n  IREE_CHECK_OK(iree_runtime_session_create_with_device(\n      instance, &amp;session_options, device,\n      iree_runtime_instance_host_allocator(instance), &amp;session));\n  iree_hal_device_release(device);\n\n  // Load your user module into the session (from memory, from file, etc).\n  const iree_file_toc_t* module_file =\n      iree_runtime_testdata_simple_mul_module_create();\n  IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_memory(\n      session, iree_make_const_byte_span(module_file-&gt;data, module_file-&gt;size),\n      iree_allocator_null()));\n\n  // Run your functions; you should reuse the session to make multiple calls.\n  iree_runtime_demo_perform_mul(session);\n\n  iree_runtime_session_release(session);\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n\n// func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt;\n// tensor&lt;4xf32&gt;\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session) {\n  iree_runtime_call_t call;\n  IREE_CHECK_OK(iree_runtime_call_initialize_by_name(\n      session, iree_make_cstring_view(\"module.simple_mul\"), &amp;call));\n\n  // %arg0: tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* arg0 = NULL;\n  static const iree_hal_dim_t arg0_shape[1] = {4};\n  static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\n  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\n      iree_runtime_session_device(session),\n      iree_runtime_session_device_allocator(session),\n      IREE_ARRAYSIZE(arg0_shape), arg0_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n      (iree_hal_buffer_params_t){\n          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n          .access = IREE_HAL_MEMORY_ACCESS_ALL,\n          .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n      },\n      iree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &amp;arg0));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, arg0, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg0));\n  iree_hal_buffer_view_release(arg0);\n\n  fprintf(stdout, \"\\n * \\n\");\n\n  // %arg1: tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* arg1 = NULL;\n  static const iree_hal_dim_t arg1_shape[1] = {4};\n  static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\n  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\n      iree_runtime_session_device(session),\n      iree_runtime_session_device_allocator(session),\n      IREE_ARRAYSIZE(arg1_shape), arg1_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n      (iree_hal_buffer_params_t){\n          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n          .access = IREE_HAL_MEMORY_ACCESS_ALL,\n          .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n      },\n      iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &amp;arg1));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, arg1, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg1));\n  iree_hal_buffer_view_release(arg1);\n\n  IREE_CHECK_OK(iree_runtime_call_invoke(&amp;call, /*flags=*/0));\n\n  fprintf(stdout, \"\\n = \\n\");\n\n  // -&gt; tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* ret0 = NULL;\n  IREE_CHECK_OK(iree_runtime_call_outputs_pop_front_buffer_view(&amp;call, &amp;ret0));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, ret0, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  iree_hal_buffer_view_release(ret0);\n\n  iree_runtime_call_deinitialize(&amp;call);\n}\n</code></pre> <p>Source file: <code>runtime/src/iree/runtime/demo/hello_world_explained.c</code></p> runtime/src/iree/runtime/demo/hello_world_explained.c<pre><code>#include &lt;stdio.h&gt;\n\n#include \"iree/runtime/api.h\"\n\nstatic int iree_runtime_demo_main(void);\nstatic iree_status_t iree_runtime_demo_run_session(\n    iree_runtime_instance_t* instance);\nstatic iree_status_t iree_runtime_demo_perform_mul(\n    iree_runtime_session_t* session);\n\n#if defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG)\n\nstatic const char* demo_file_path = NULL;\n\n// Takes the first argument on the command line as a file path and loads it.\nint main(int argc, char** argv) {\n  if (argc &lt; 2) {\n    fprintf(stderr, \"usage: session_demo module_file.vmfb\\n\");\n    return 1;\n  }\n  demo_file_path = argv[1];\n  return iree_runtime_demo_main();\n}\n\n// Loads a compiled IREE module from the file system.\nstatic iree_status_t iree_runtime_demo_load_module(\n    iree_runtime_session_t* session) {\n  return iree_runtime_session_append_bytecode_module_from_file(session,\n                                                               demo_file_path);\n}\n\n#elif defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA)\n\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nint main(int argc, char** argv) { return iree_runtime_demo_main(); }\n\n// Loads the bytecode module directly from memory.\n//\n// Embedding the compiled output into your binary is not always possible (or\n// recommended) but is a fairly painless way to get things working on a variety\n// of targets without worrying about how to deploy files or pass flags.\n//\n// In cases like this the module file is in .rodata and does not need to be\n// freed; if the memory needs to be released when the module is unloaded then a\n// custom allocator can be provided to get a callback instead.\nstatic iree_status_t iree_runtime_demo_load_module(\n    iree_runtime_session_t* session) {\n  const iree_file_toc_t* module_file =\n      iree_runtime_testdata_simple_mul_module_create();\n  return iree_runtime_session_append_bytecode_module_from_memory(\n      session, iree_make_const_byte_span(module_file-&gt;data, module_file-&gt;size),\n      iree_allocator_null());\n}\n\n#else\n#error \"must specify a way to load the module data\"\n#endif  // IREE_RUNTIME_DEMO_LOAD_FILE_FROM_*\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n// Applications should create and share a single instance across all sessions.\n\n// This would live in your application startup/shutdown code or scoped to the\n// usage of IREE. Creating and destroying instances is expensive and should be\n// avoided.\nstatic int iree_runtime_demo_main(void) {\n  // Set up the shared runtime instance.\n  // An application should usually only have one of these and share it across\n  // all of the sessions it has. The instance is thread-safe, while the\n  // sessions are only thread-compatible (you need to lock if its required).\n  iree_runtime_instance_options_t instance_options;\n  iree_runtime_instance_options_initialize(&amp;instance_options);\n  iree_runtime_instance_options_use_all_available_drivers(&amp;instance_options);\n  iree_runtime_instance_t* instance = NULL;\n  iree_status_t status = iree_runtime_instance_create(\n      &amp;instance_options, iree_allocator_system(), &amp;instance);\n\n  // Run the demo.\n  // A real application would load its models (at startup, on-demand, etc) and\n  // retain them somewhere to be reused. Startup time and likelihood of failure\n  // varies across different HAL backends; the synchronous CPU backend is nearly\n  // instantaneous and will never fail (unless out of memory) while the Vulkan\n  // backend may take significantly longer and fail if there are not supported\n  // devices.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_run_session(instance);\n  }\n\n  // Release the shared instance - it will be deallocated when all sessions\n  // using it have been released (here it is deallocated immediately).\n  iree_runtime_instance_release(instance);\n\n  int ret = (int)iree_status_code(status);\n  if (!iree_status_is_ok(status)) {\n    // Dump nice status messages to stderr on failure.\n    // An application can route these through its own logging infrastructure as\n    // needed. Note that the status is a handle and must be freed!\n    iree_status_fprint(stderr, status);\n    iree_status_ignore(status);\n  }\n  return ret;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n// Each instantiation of a module will live in its own session. Module state\n// like variables will be retained across calls within the same session.\n\n// Loads the demo module and uses it to perform some math.\n// In a real application you'd want to hang on to the iree_runtime_session_t\n// and reuse it for future calls - especially if it holds state internally.\nstatic iree_status_t iree_runtime_demo_run_session(\n    iree_runtime_instance_t* instance) {\n  // TODO(#5724): move device selection into the compiled modules.\n  iree_hal_device_t* device = NULL;\n  IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(\n      instance, iree_make_cstring_view(\"local-task\"), &amp;device));\n\n  // Set up the session to run the demo module.\n  // Sessions are like OS processes and are used to isolate modules from each\n  // other and hold runtime state such as the variables used within the module.\n  // The same module loaded into two sessions will see their own private state.\n  iree_runtime_session_options_t session_options;\n  iree_runtime_session_options_initialize(&amp;session_options);\n  iree_runtime_session_t* session = NULL;\n  iree_status_t status = iree_runtime_session_create_with_device(\n      instance, &amp;session_options, device,\n      iree_runtime_instance_host_allocator(instance), &amp;session);\n  iree_hal_device_release(device);\n\n  // Load the compiled user module in a demo-specific way.\n  // Applications could specify files, embed the outputs directly in their\n  // binaries, fetch them over the network, etc.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_load_module(session);\n  }\n\n  // Build and issue the call.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_perform_mul(session);\n  }\n\n  // Release the session and free all resources.\n  iree_runtime_session_release(session);\n  return status;\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n// The inputs and outputs of a call are reusable across calls (and possibly\n// across sessions depending on device compatibility) and can be setup by the\n// application as needed. For example, an application could perform\n// multi-threaded buffer view creation and then issue the call from a single\n// thread when all inputs are ready. This simple demo just allocates them\n// per-call and throws them away.\n\n// Sets up and calls the simple_mul function and dumps the results:\n// func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt;\n// tensor&lt;4xf32&gt;\n//\n// NOTE: this is a demo and as such this performs no memoization; a real\n// application could reuse a lot of these structures and cache lookups of\n// iree_vm_function_t to reduce the amount of per-call overhead.\nstatic iree_status_t iree_runtime_demo_perform_mul(\n    iree_runtime_session_t* session) {\n  // Initialize the call to the function.\n  iree_runtime_call_t call;\n  IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(\n      session, iree_make_cstring_view(\"module.simple_mul\"), &amp;call));\n\n  // Append the function inputs with the HAL device allocator in use by the\n  // session. The buffers will be usable within the session and _may_ be usable\n  // in other sessions depending on whether they share a compatible device.\n  iree_hal_device_t* device = iree_runtime_session_device(session);\n  iree_hal_allocator_t* device_allocator =\n      iree_runtime_session_device_allocator(session);\n  iree_allocator_t host_allocator =\n      iree_runtime_session_host_allocator(session);\n  iree_status_t status = iree_ok_status();\n  {\n    // %arg0: tensor&lt;4xf32&gt;\n    iree_hal_buffer_view_t* arg0 = NULL;\n    if (iree_status_is_ok(status)) {\n      static const iree_hal_dim_t arg0_shape[1] = {4};\n      static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\n      status = iree_hal_buffer_view_allocate_buffer_copy(\n          device, device_allocator,\n          // Shape rank and dimensions:\n          IREE_ARRAYSIZE(arg0_shape), arg0_shape,\n          // Element type:\n          IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n          // Encoding type:\n          IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n          (iree_hal_buffer_params_t){\n              // Where to allocate (host or device):\n              .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n              // Access to allow to this memory:\n              .access = IREE_HAL_MEMORY_ACCESS_ALL,\n              // Intended usage of the buffer (transfers, dispatches, etc):\n              .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n          },\n          // The actual heap buffer to wrap or clone and its allocator:\n          iree_make_const_byte_span(arg0_data, sizeof(arg0_data)),\n          // Buffer view + storage are returned and owned by the caller:\n          &amp;arg0);\n    }\n    if (iree_status_is_ok(status)) {\n      IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\n          stdout, arg0, /*max_element_count=*/4096, host_allocator));\n      // Add to the call inputs list (which retains the buffer view).\n      status = iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg0);\n    }\n    // Since the call retains the buffer view we can release it here.\n    iree_hal_buffer_view_release(arg0);\n\n    fprintf(stdout, \"\\n * \\n\");\n\n    // %arg1: tensor&lt;4xf32&gt;\n    iree_hal_buffer_view_t* arg1 = NULL;\n    if (iree_status_is_ok(status)) {\n      static const iree_hal_dim_t arg1_shape[1] = {4};\n      static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\n      status = iree_hal_buffer_view_allocate_buffer_copy(\n          device, device_allocator, IREE_ARRAYSIZE(arg1_shape), arg1_shape,\n          IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n          IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n          (iree_hal_buffer_params_t){\n              .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n              .access = IREE_HAL_MEMORY_ACCESS_ALL,\n              .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n          },\n          iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &amp;arg1);\n    }\n    if (iree_status_is_ok(status)) {\n      IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\n          stdout, arg1, /*max_element_count=*/4096, host_allocator));\n      status = iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg1);\n    }\n    iree_hal_buffer_view_release(arg1);\n  }\n\n  // Synchronously perform the call.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_call_invoke(&amp;call, /*flags=*/0);\n  }\n\n  fprintf(stdout, \"\\n = \\n\");\n\n  // Dump the function outputs.\n  iree_hal_buffer_view_t* ret0 = NULL;\n  if (iree_status_is_ok(status)) {\n    // Try to get the first call result as a buffer view.\n    status = iree_runtime_call_outputs_pop_front_buffer_view(&amp;call, &amp;ret0);\n  }\n  if (iree_status_is_ok(status)) {\n    // This prints the buffer view out but an application could read its\n    // contents, pass it to another call, etc.\n    status = iree_hal_buffer_view_fprint(\n        stdout, ret0, /*max_element_count=*/4096, host_allocator);\n  }\n  iree_hal_buffer_view_release(ret0);\n\n  iree_runtime_call_deinitialize(&amp;call);\n  return status;\n}\n</code></pre>"},{"location":"reference/bindings/c-api/#samples_1","title":"Samples","text":"Project Source Description iree-org/iree-template-runtime-cmake <code>hello_world.c</code> Runtime application template iree-org/iree <code>runtime/demo/</code> In-tree demos of the high level runtime API iree-org/iree <code>samples/</code> In-tree sample applications iree-org/iree-experimental <code>runtime-library/</code> Shared runtime library builderBuilds <code>libireert.so</code> to aid development iml130/iree-template-cpp <code>simple_embedding.c</code> Demo integration into a project"},{"location":"reference/bindings/c-api/#compiler-runtime-jit","title":"Compiler + Runtime = JIT","text":"<p>The compiler and runtime APIs may be used together to build a \"just in time\" (JIT) execution engine. JIT compilation allows for last-minute specialization with no prior knowledge of target devices and avoids issues with version drift, but it can also constrain deployment options and usage scenarios.</p>"},{"location":"reference/bindings/python/","title":"Python bindings","text":"","tags":["Python"]},{"location":"reference/bindings/python/#overview","title":"Overview","text":"<p>IREE offers Python bindings split into several packages, covering different components:</p> PIP package name Description <code>iree-compiler</code> IREE's generic compiler tools and helpers <code>iree-runtime</code> IREE's runtime, including CPU and GPU backends <code>iree-tools-tf</code> Tools for importing from TensorFlow <code>iree-tools-tflite</code> Tools for importing from TensorFlow Lite <code>iree-jax</code> Tools for importing from JAX <p>Collectively, these packages allow for importing from frontends, compiling towards various targets, and executing compiled code on IREE's backends.</p>","tags":["Python"]},{"location":"reference/bindings/python/#prerequisites","title":"Prerequisites","text":"<p>To use IREE's Python bindings, you will first need to install Python 3 and pip, as needed.</p> Tip - Virtual environments <p>We recommend using virtual environments to manage python packages, such as through <code>venv</code> (about, tutorial):</p>  Linux macOS Windows <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\n.venv\\Scripts\\activate.bat\n</code></pre> <p>When done, run <code>deactivate</code>.</p>","tags":["Python"]},{"location":"reference/bindings/python/#installing-iree-packages","title":"Installing IREE packages","text":"","tags":["Python"]},{"location":"reference/bindings/python/#prebuilt-packages","title":"Prebuilt packages","text":"Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#building-from-source","title":"Building from source","text":"<p>See Building Python bindings page for instructions for building from source.</p>","tags":["Python"]},{"location":"reference/bindings/python/#usage","title":"Usage","text":"<p>Info - API reference pages</p> <p>API reference pages for IREE's runtime and compiler Python APIs are hosted on readthedocs.</p> <p>Documentation for the MLIR compiler Python APIs can be found at https://mlir.llvm.org/docs/Bindings/Python/.</p>","tags":["Python"]},{"location":"reference/bindings/python/#compile-a-program","title":"Compile a program","text":"<pre><code>from iree import compiler as ireec\n\n# Compile a module.\nINPUT_MLIR = \"\"\"\nmodule @arithmetic {\n  func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt; {\n    %0 = arith.mulf %arg0, %arg1 : tensor&lt;4xf32&gt;\n    return %0 : tensor&lt;4xf32&gt;\n  }\n}\n\"\"\"\n\n# Compile using the vmvx (reference) target:\ncompiled_flatbuffer = ireec.tools.compile_str(\n    INPUT_MLIR,\n    target_backends=[\"vmvx\"])\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#run-a-compiled-program","title":"Run a compiled program","text":"<pre><code>from iree import runtime as ireert\nimport numpy as np\n\n# Register the module with a runtime context.\n# Use the \"local-task\" CPU driver, which can load the vmvx executable:\nconfig = ireert.Config(\"local-task\")\nctx = ireert.SystemContext(config=config)\nvm_module = ireert.VmModule.copy_buffer(ctx.instance, compiled_flatbuffer)\nctx.add_vm_module(vm_module)\n\n# Invoke the function and print the result.\nprint(\"INVOKE simple_mul\")\narg0 = np.array([1., 2., 3., 4.], dtype=np.float32)\narg1 = np.array([4., 5., 6., 7.], dtype=np.float32)\nf = ctx.modules.arithmetic[\"simple_mul\"]\nresults = f(arg0, arg1).to_host()\nprint(\"Results:\", results)\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#samples","title":"Samples","text":"<p>Check out the samples in IREE's samples/colab/ directory and the iree-experimental repository for examples using the Python APIs.</p>","tags":["Python"]},{"location":"reference/bindings/python/#console-scripts","title":"Console scripts","text":"<p>The Python packages include console scripts for most of IREE's native tools like <code>iree-compile</code> and <code>iree-run-module</code>.  After installing a package from pip, these should be added to your path automatically:</p> <pre><code>$ python -m pip install iree-runtime\n$ which iree-run-module\n\n/projects/.venv/Scripts/iree-run-module\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#profiling","title":"Profiling","text":"<p>The tools in the <code>iree-runtime</code> package support variants:</p> Variant name Description default Standard runtime tools tracy Runtime tools instrumented using the Tracy profiler <p>Switch between variants of the installed tools using the <code>IREE_PY_RUNTIME</code> environment variable:</p> <pre><code>IREE_PY_RUNTIME=tracy iree-run-module ...\n</code></pre> <p>See the developer documentation page on Profiling with Tracy for information on using Tracy.</p> <p>Tip - flushing profile data</p> <p>When writing a Python-based program that you want to profile you may need to insert IREE runtime calls to periodically flush the profile data:</p> <pre><code>device = ... # HalDevice\ndevice.flush_profiling()\n</code></pre>","tags":["Python"]},{"location":"reference/mlir-dialects/","title":"MLIR dialects","text":"<p>These pages contain automatically generated documentation for the MLIR dialects defined in the IREE repository. IREE also makes extensive use of dialects from the upstream MLIR repository, which are documented at https://mlir.llvm.org/docs/Dialects/.</p>"},{"location":"reference/mlir-dialects/#iree-internal-dialects","title":"IREE internal dialects","text":"<p>These dialects are an implementation detail of the IREE compiler, though they can be used by plugins and other advanced integrations. The sources for most of these dialects can be found in the <code>iree/compiler/Dialect/</code> directory.</p> Dialect Description Check Defines assertions for IREE tests Flow Models execution data flow and partitioning HAL Represents operations against the IREE HAL<sup>1</sup> HAL/Inline Inline HAL interop runtime module dialect HAL/Loader HAL inline executable loader runtime module dialect IO/Parameters External parameter resource management APIs LinalgExt Extensions to the Linalg dialect for specific operations Stream Model execution partitioning and scheduling Util Types and ops common across IREE subdialects VM Represents operations against an abstract virtual machine VMVX Virtual Machine Vector Extensions"},{"location":"reference/mlir-dialects/#iree-public-dialects","title":"IREE public dialects","text":"<p>The ops in these dialects are legal to include in compiler inputs. The sources for these dialects can be found in the <code>llvm-external-projects/iree-dialects/</code> directory that is designed to be used from other projects via LLVM's external projects mechanism.</p> Dialect Description IREEInput Structural ops legal as input to IREE's compiler IREEVectorExt Extensions to the Vector dialect for specific operations <ol> <li> <p>Hardware Abstraction Layer\u00a0\u21a9</p> </li> </ol>"},{"location":"reference/mlir-dialects/Check/","title":"Check","text":""},{"location":"reference/mlir-dialects/Check/#check-dialect","title":"'check' Dialect","text":"<p>A dialect implementing test assertions for IREE modules.</p> <ul> <li>'check' Dialect<ul> <li>Operations<ul> <li>check.expect_all_true (Check::ExpectAllTrueOp)</li> <li>check.expect_almost_eq (Check::ExpectAlmostEqOp)</li> <li>check.expect_almost_eq_const (Check::ExpectAlmostEqConstOp)</li> <li>check.expect_eq (Check::ExpectEqOp)</li> <li>check.expect_eq_const (Check::ExpectEqConstOp)</li> <li>check.expect_false (Check::ExpectFalseOp)</li> <li>check.expect_true (Check::ExpectTrueOp)</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Check/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Check/#checkexpect_all_true-checkexpectalltrueop","title":"<code>check.expect_all_true</code> (Check::ExpectAllTrueOp)","text":"<p>Checks that the operand contains only values that are true</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_all_true` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains true values, which are represented by any non-zero integer.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_all_true&lt;%device&gt;(%arg0) : !hal.buffer_view\ncheck.expect_all_true(%arg1) : tensor&lt;2x2xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands","title":"Operands:","text":"Operand Description <code>device</code> device <code>operand</code> buffer_view or tensor of signless integer values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq-checkexpectalmosteqop","title":"<code>check.expect_almost_eq</code> (Check::ExpectAlmostEqOp)","text":"<p>Checks that the operands are almost equal</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_almost_eq` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the buffer view or tensor operands with float elements are almost equal to within an implementation-defined \"reasonable\" tolerance.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_almost_eq(%arg0, %arg1) : tensor&lt;5xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_1","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> buffer_view or tensor of floating-point values <code>rhs</code> buffer_view or tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq_const-checkexpectalmosteqconstop","title":"<code>check.expect_almost_eq_const</code> (Check::ExpectAlmostEqConstOp)","text":"<p>Checks that the tensor operand is almost equal to some constant</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_almost_eq_const` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the tensor operand with float elements is almost equal to the constant attribute within an implementation-defined \"reasonable\" tolerance.</p> <p>Issues a non-fatal failure if the verification fails.</p> <p>This op is just a convenience wrapper around the expect_almost_eq op.</p> <pre><code>check.expect_almost_eq_const(%const0, dense&lt;[0.999999, 2.0]&gt; : tensor&lt;5xf32&gt;) : tensor&lt;5xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_2","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq-checkexpecteqop","title":"<code>check.expect_eq</code> (Check::ExpectEqOp)","text":"<p>Checks that the tensor or buffer view operands are equal</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_eq` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the operands are exactly equal.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_eq(%arg0, %arg1) : tensor&lt;5xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_3","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> buffer_view or tensor of any type values <code>rhs</code> buffer_view or tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq_const-checkexpecteqconstop","title":"<code>check.expect_eq_const</code> (Check::ExpectEqConstOp)","text":"<p>Checks that the tensor operand is equal to some constant</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_eq_const` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the tensor operand is exactly equal to a constant attribute.</p> <p>Issues a non-fatal failure if the verification fails.</p> <p>This op is just a convenience wrapper around the expect_eq op.</p> <pre><code>check.expect_eq_const(%arg0, dense&lt;[1, 2]&gt; : tensor&lt;2xi32&gt;) : tensor&lt;2xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_4","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_false-checkexpectfalseop","title":"<code>check.expect_false</code> (Check::ExpectFalseOp)","text":"<p>Checks that the operand is false</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_false` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains a false value, which is represented by zero.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_false(%arg0) : i32\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_5","title":"Operands:","text":"Operand Description <code>operand</code> signless integer"},{"location":"reference/mlir-dialects/Check/#checkexpect_true-checkexpecttrueop","title":"<code>check.expect_true</code> (Check::ExpectTrueOp)","text":"<p>Checks that the operand is true</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_true` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains a true value, which is represented by any non-zero integer.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_true(%arg0) : i32\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_6","title":"Operands:","text":"Operand Description <code>operand</code> signless integer"},{"location":"reference/mlir-dialects/Flow/","title":"Flow","text":""},{"location":"reference/mlir-dialects/Flow/#flow-dialect","title":"'flow' Dialect","text":"<p>A dialect designed to model execution data flow and partitioning.</p> <p>The flow dialect is used to model regions of dense computation and the data flow between them. MLIR value-semantic tensors are used as the primary data type to allow SSA use-def to provide a bulk of the infrastructure required to perform the computation partitioning and outlining.</p> <p>The dialect is designed to ingest relatively high-level linear algebra via XLA HLO ops (that also operate on the value-semantic tensor types) and optionally MLIR standard ops for control flow and other actions. After conversion of any higher-level ops that have special semantics in the flow dialect, such as global variables, the rest are partitioned into regions containing simple and compatible computations. Finally, outlining moves the computations into executables and leaves only the execution flow encoded via dispatch operations.</p> <p>The primary unit of interest is a \"dispatch region\" containing compatible computations that can be scheduled together efficiently (and safely). \"Compatible\" here is specified as similarly shaped workloads that indicate how many invocations a computation can be parallelized across when running in a SPMD execution model. Though it depends on the particular runtime backends this more concretely means things like the untiled workload (or tiled workgroups) used in GPU dispatches or similar thread pool executors.</p> <p>After identification of the dispatchable regions a set of transformations performs folding and simplification to reduce the total number of dispatches. Heuristics are used in certain cases to more efficiently schedule special ops (such as GEMM) and the design is amenable to profile- guided analysis that can be added in the future.</p> <p>The resulting outlined executable modules containing the dispatchable code can be translated to one or more backends (such as SPIR-V for Vulkan, or LLVM IR for running on the CPU, etc). The IR that is outlined is untouched and in the input format (such as XLA HLO ops) allowing conversion using any MLIR target that supports ingesting such input. A few special ops are used to communicate statically available information such as the expected workload size, shapes of inputs and outputs, etc.</p> <ul> <li>'flow' Dialect<ul> <li>Operations<ul> <li>Collective communication ops<ul> <li>flow.channel.count (Flow::ChannelCountOp)</li> <li>flow.channel.default (Flow::ChannelDefaultOp)</li> <li>flow.channel.rank (Flow::ChannelRankOp)</li> <li>flow.channel.split (Flow::ChannelSplitOp)</li> <li>flow.collective.all_gather (Flow::CollectiveAllGatherOp)</li> <li>flow.collective.all_reduce (Flow::CollectiveAllReduceOp)</li> <li>flow.collective.all_to_all (Flow::CollectiveAllToAllOp)</li> <li>flow.collective.reduce_scatter (Flow::CollectiveReduceScatterOp)</li> <li>flow.collective.send_recv (Flow::CollectiveSendRecvOp)</li> </ul> </li> <li>Dispatch ops<ul> <li>flow.dispatch (Flow::DispatchOp)</li> </ul> </li> <li>Executable ops<ul> <li>flow.executable_end (Flow::ExecutableEndOp)</li> <li>flow.executable.export (Flow::ExecutableExportOp)</li> <li>flow.executable (Flow::ExecutableOp)</li> </ul> </li> <li>Partitioned region ops<ul> <li>flow.dispatch.region (Flow::DispatchRegionOp)</li> <li>flow.dispatch.tensor.load (Flow::DispatchTensorLoadOp)</li> <li>flow.dispatch.tensor.store (Flow::DispatchTensorStoreOp)</li> <li>flow.dispatch.tie_shape (Flow::DispatchTieShapeOp)</li> <li>flow.dispatch.workgroup.count (Flow::DispatchWorkgroupCountOp)</li> <li>flow.dispatch.workgroup.id (Flow::DispatchWorkgroupIDOp)</li> <li>flow.dispatch.workgroup.size (Flow::DispatchWorkgroupSizeOp)</li> <li>flow.dispatch.workgroups (Flow::DispatchWorkgroupsOp)</li> <li>flow.return (Flow::ReturnOp)</li> </ul> </li> <li>Streamable call ops<ul> <li>flow.call (Flow::CallOp)</li> <li>flow.func (Flow::FuncOp)</li> </ul> </li> <li>Tensor ops<ul> <li>flow.dispatch.workgroup_count_from_dag_root (Flow::DispatchWorkgroupCountFromDagRootOp)</li> <li>flow.dispatch.workgroup_count_from_slice (Flow::DispatchWorkgroupCountFromSliceOp)</li> <li>flow.dispatch.workload.ordinal (Flow::DispatchWorkloadOrdinalOp)</li> <li>flow.tensor.alloca (Flow::TensorAllocaOp)</li> <li>flow.tensor.bitcast (Flow::TensorBitCastOp)</li> <li>flow.tensor.clone (Flow::TensorCloneOp)</li> <li>flow.tensor.constant (Flow::TensorConstantOp)</li> <li>flow.tensor.empty (Flow::TensorEmptyOp)</li> <li>flow.tensor.load (Flow::TensorLoadOp)</li> <li>flow.tensor.reshape (Flow::TensorReshapeOp)</li> <li>flow.tensor.slice (Flow::TensorSliceOp)</li> <li>flow.tensor.splat (Flow::TensorSplatOp)</li> <li>flow.tensor.store (Flow::TensorStoreOp)</li> <li>flow.tensor.tie_shape (Flow::TensorTieShapeOp)</li> <li>flow.tensor.trace (Flow::TensorTraceOp)</li> <li>flow.tensor.update (Flow::TensorUpdateOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>DummyAttr</li> </ul> </li> <li>Type constraints<ul> <li>dispatch.tensor</li> <li>dispatch.tensor</li> <li>dispatch.tensor</li> </ul> </li> <li>Types<ul> <li>ChannelType</li> <li>DummyType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Flow/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Flow/#collective-communication-ops","title":"Collective communication ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowchannelcount-flowchannelcountop","title":"<code>flow.channel.count</code> (Flow::ChannelCountOp)","text":"<p>Returns the total number of participants in the group</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.count` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total participant count in the collective communicator group.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowchanneldefault-flowchanneldefaultop","title":"<code>flow.channel.default</code> (Flow::ChannelDefaultOp)","text":"<p>Returns a default collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.default` ($group^)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a channel initialized using the runtime environment.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>group</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#results_1","title":"Results:","text":"Result Description <code>result</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowchannelrank-flowchannelrankop","title":"<code>flow.channel.rank</code> (Flow::ChannelRankOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.rank` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_1","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowchannelsplit-flowchannelsplitop","title":"<code>flow.channel.split</code> (Flow::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.split` $channel `,` $color `,` $key\n              `:` type($channel) `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_2","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel <code>color</code> index <code>key</code> index"},{"location":"reference/mlir-dialects/Flow/#results_3","title":"Results:","text":"Result Description <code>result</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_gather-flowcollectiveallgatherop","title":"<code>flow.collective.all_gather</code> (Flow::CollectiveAllGatherOp)","text":"<p>Performs all-gather operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_gather` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>It gathers data from all ranks and concatenates them on the 0-th dimension. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_3","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_4","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_reduce-flowcollectiveallreduceop","title":"<code>flow.collective.all_reduce</code> (Flow::CollectiveAllReduceOp)","text":"<p>Performs all-reduce operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_reduce` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation reduces data across all the ranks in the channel. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>reduction_op</code>mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_4","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_5","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_to_all-flowcollectivealltoallop","title":"<code>flow.collective.all_to_all</code> (Flow::CollectiveAllToAllOp)","text":"<p>Performs all-to-all operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_to_all` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>This operation mutually exchanges data acrosss all of the ranks in the channel. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_5","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_6","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivereduce_scatter-flowcollectivereducescatterop","title":"<code>flow.collective.reduce_scatter</code> (Flow::CollectiveReduceScatterOp)","text":"<p>Performs reduce and scatter operations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.reduce_scatter` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation reduces data across all the ranks in the channel and     scatters the result to each rank. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>reduction_op</code>mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_6","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_7","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivesend_recv-flowcollectivesendrecvop","title":"<code>flow.collective.send_recv</code> (Flow::CollectiveSendRecvOp)","text":"<p>Performs a grouped send and receive operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.send_recv` $element_type `,` $target `,` $source `,` $channel `,` $send `,` $recv `:`\n              `(` type($target) `,` type($source) `,` type($channel) `,` type($send) `,` type($recv) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation sends data to the rank specificied by send     and receives data from the rank specified by recv. If send is -1, this rank     will not send any data. If recv is -1, this rank will not receive any data     and the output will be all zeros. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_7","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel <code>send</code> index <code>recv</code> index"},{"location":"reference/mlir-dialects/Flow/#results_8","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatch-flowdispatchop","title":"<code>flow.dispatch</code> (Flow::DispatchOp)","text":"<p>A dispatch of workgroups across a grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch` custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Dispatches workgroups across an grid defined by the captured workload parameters carrying the information required to compute the workgroup count at runtime. The function for converting the workload into a 3D workgroup count is attached to the dispatch entry point and may contain arbitrary host logic.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_8","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_9","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#executable-ops","title":"Executable ops","text":"<p>Executables for outlined regions.</p>"},{"location":"reference/mlir-dialects/Flow/#flowexecutable_end-flowexecutableendop","title":"<code>flow.executable_end</code> (Flow::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Flow::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/Flow/#flowexecutableexport-flowexecutableexportop","title":"<code>flow.executable.export</code> (Flow::ExecutableExportOp)","text":"<p>Defines an executable entry point for dispatch operations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              custom&lt;SymbolAlias&gt;($sym_name, $function_ref)\n              custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              attr-dict-with-keyword\n</code></pre> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.</p> <p>Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.</p> <p>Traits: <code>HasParent&lt;IREE::Flow::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Flow/#flowexecutable-flowexecutableop","title":"<code>flow.executable</code> (Flow::ExecutableOp)","text":"<p>Generic executable module</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Flow::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#partitioned-region-ops","title":"Partitioned region ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchregion-flowdispatchregionop","title":"<code>flow.dispatch.region</code> (Flow::DispatchRegionOp)","text":"<p>A group of ops</p> <p>This op is a container/grouping of ops. It represents a fusion group before being lowered to a dispatch region. Ops are collected inside of the region body of the op. Values from parent regions can be captured. Results are yielded with a <code>return</code> terminator and returned from this op.</p> <p><code>dispatch.region</code> ops are lowered to <code>dispatch.workgroups</code> ops. Workgroups isolated from above. <code>dispatch.region</code> ops are a more lightweight abstraction for implementing fusion heuristics, i.e., the process of deciding which ops should form a dispatch region.</p> <p>This op also has a second region: <code>workload_count</code>. The arguments to the region represent the workload for the dispatch, and returns the number of workgroups for the dispatch. The region is lowered directly to <code>workload_count</code> region of <code>dispatch.workgroups</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_9","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index <code>workload</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_10","title":"Results:","text":"Result Description <code>result</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorload-flowdispatchtensorloadop","title":"<code>flow.dispatch.tensor.load</code> (Flow::DispatchTensorLoadOp)","text":"<p>Loads a tensor from a dispatch input placeholder</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tensor.load` $source\n              `,` `offsets` `=` custom&lt;DynamicIndexList&gt;(\n              $offsets, $static_offsets)\n              `,` `sizes` `=` custom&lt;DynamicIndexList&gt;(\n              $sizes, $static_sizes)\n              `,` `strides` `=` custom&lt;DynamicIndexList&gt;(\n              $strides, $static_strides)\n              attr-dict `:` type($source) (`{` $source_dims^ `}`)?  `-&gt;` type($result)\n</code></pre> <p>Loads an input tensor or subtensor from an input placeholder. As each workgroup executes concurrently all workgroups will receive identical loaded results of regions that may overlap.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OffsetSizeAndStrideOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>static_offsets</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_sizes</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_strides</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_10","title":"Operands:","text":"Operand Description <code>source</code> dispatch.tensor <code>source_dims</code> variadic of index <code>offsets</code> variadic of index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_11","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorstore-flowdispatchtensorstoreop","title":"<code>flow.dispatch.tensor.store</code> (Flow::DispatchTensorStoreOp)","text":"<p>Stores a tensor into a dispatch output placeholder</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tensor.store` $value `,` $target\n              `,` `offsets` `=` custom&lt;DynamicIndexList&gt;(\n              $offsets, $static_offsets)\n              `,` `sizes` `=` custom&lt;DynamicIndexList&gt;(\n              $sizes, $static_sizes)\n              `,` `strides` `=` custom&lt;DynamicIndexList&gt;(\n              $strides, $static_strides)\n              attr-dict `:` type($value) `-&gt;` type($target) (`{` $target_dims^ `}`)?\n</code></pre> <p>Stores a tensor or subtensor into an output tensor placeholder. As each workgroup executes concurrently behavior is undefined if more than one workgroup stores into overlapping regions of the full output tensor.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>OffsetSizeAndStrideOpInterface</code>, <code>Util_ShapeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>static_offsets</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_sizes</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_strides</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_11","title":"Operands:","text":"Operand Description <code>value</code> ranked tensor of any type values <code>target</code> dispatch.tensor <code>target_dims</code> variadic of index <code>offsets</code> variadic of index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtie_shape-flowdispatchtieshapeop","title":"<code>flow.dispatch.tie_shape</code> (Flow::DispatchTieShapeOp)","text":"<p>Ties a runtime shape to a dispatch I/O argument</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tie_shape` $operand attr-dict\n              `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Metadata op used to tie a runtime-computed shape with dynamic dimensions to a dispatch input/output argument. All uses of the argument should use the pass-through result of this op to allow for SSA-based shape resolution.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_12","title":"Operands:","text":"Operand Description <code>operand</code> dispatch.tensor <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_12","title":"Results:","text":"Result Description <code>result</code> dispatch.tensor"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupcount-flowdispatchworkgroupcountop","title":"<code>flow.dispatch.workgroup.count</code> (Flow::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.count[0] : index\n%y = flow.dispatch.workgroup.count[1] : index\n%z = flow.dispatch.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_13","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupid-flowdispatchworkgroupidop","title":"<code>flow.dispatch.workgroup.id</code> (Flow::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, flow.dispatch.workgroup.count)</code> along each dimension.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.id[0] : index\n%y = flow.dispatch.workgroup.id[1] : index\n%z = flow.dispatch.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_14","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupsize-flowdispatchworkgroupsizeop","title":"<code>flow.dispatch.workgroup.size</code> (Flow::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the flow dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.size[0] : index\n%y = flow.dispatch.workgroup.size[1] : index\n%z = flow.dispatch.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_15","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroups-flowdispatchworkgroupsop","title":"<code>flow.dispatch.workgroups</code> (Flow::DispatchWorkgroupsOp)","text":"<p>A dispatch of workgroups across a 3-dimensional grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroups` (`[` $workload^ `]`)? ``\n              `(` $arguments `)` `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n              attr-dict-with-keyword\n              `=` `\\n` ` ` ` ` ` `\n              custom&lt;DispatchWorkgroupBody&gt;(ref(type($arguments)),\n              ref(type($results)),\n              $workgroup_body)\n              `` custom&lt;DispatchWorkgroupsCountRegion&gt;($workgroup_count)\n</code></pre> <p>Dispatches some number of workgroups across a 3-dimensional grid. The body region will be invoked for each workgroup with a unique <code>flow.dispatch.workgroup.id</code> in the range of <code>[0, flow.dispatch.workgroup.count)</code> (along each dimension XYZ).</p> <p>From the outside the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors.</p> <p>Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the <code>flow.dispatch.workgroup.*</code> ops to query the workgroup ID/count/size instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc).</p> <p>Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it.</p> <pre><code>%r = flow.dispatch.workgroups[%c5, %c5](%0, %1)\n    : (tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt; =\n          (%arg0: !flow.dispatch.tensor&lt;readonly:tensor&lt;5x5xf32&gt;&gt;,\n           %arg1: !flow.dispatch.tensor&lt;readonly:tensor&lt;5xf32&gt;&gt;,\n           %arg2: !flow.dispatch.tensor&lt;writeonly:tensor&lt;5x5xf32&gt;&gt;) {\n  ...\n}\n</code></pre> <p>The number of results of the operation is equal to the number of results in the type signature (<code>(tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;</code>). Each tensor argument and result in the type signature has a corresponding block argument of type <code>!flow.dispatch.tensor</code>. Furthermore, each argument has a corresponding <code>arguments</code> operand.</p> <p>There are no <code>arguments</code> operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, <code>-&gt; %0</code> would tie the first argument to the result. In that case, there would be no separate block argument for the result.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>ClosureOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_13","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_16","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowreturn-flowreturnop","title":"<code>flow.return</code> (Flow::ReturnOp)","text":"<p>Return from a flow.dispatch_region</p> <p>Syntax:</p> <pre><code>operation ::= `flow.return` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p>Returns the given values from the region and back to the host code.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_14","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#streamable-call-ops","title":"Streamable call ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowcall-flowcallop","title":"<code>flow.call</code> (Flow::CallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `flow.call` $callee\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Calls a function taking/returning tensor values with stream semantics. Tensors have their shapes captured and may be tied to denote in-place operations. Asynchronous calls must have no side-effects.</p> <p>Note that returned tensors must have their shapes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the shape (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If shapes are unknowable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>CallOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_15","title":"Operands:","text":"Operand Description <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_17","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowfunc-flowfuncop","title":"<code>flow.func</code> (Flow::FuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `flow.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              ``\n              custom&lt;ShapedFunctionSignature&gt;($function_type,\n              $tied_operands,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>flow.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Flow/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_dag_root-flowdispatchworkgroupcountfromdagrootop","title":"<code>flow.dispatch.workgroup_count_from_dag_root</code> (Flow::DispatchWorkgroupCountFromDagRootOp)","text":"<p>Workgroup count computed based on iteration range of the root of the DAG     for ops within the dispatch.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup_count_from_dag_root` attr-dict $operands\n</code></pre> <p>When using tile + distribution of the root of the DAG (Directed Acyclic Graph) of ops within the dispatch to split the work amongst workgroups. The workload captured is the size of the iteration space of the root of the DAG. This op represents the computation that given the workload returns the number of workgroups to use. The backends are responsible for lowering this op into actual computation (typically based on the tile sizes used to tile and distribute the root of the DAG).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_16","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_18","title":"Results:","text":"Result Description <code>x</code> index <code>y</code> index <code>z</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_slice-flowdispatchworkgroupcountfromsliceop","title":"<code>flow.dispatch.workgroup_count_from_slice</code> (Flow::DispatchWorkgroupCountFromSliceOp)","text":"<p>Place holder to signify default workgroup count calculation.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup_count_from_slice` attr-dict $operands\n</code></pre> <p>The default computation of the number of workgroups (or workgroup count) assumes that the dispatch + captured values is enough to compute the workgroup count. It does so by using a program slice of the values within the dispatch that represent the number of workgroups when available within the dispatch. Currently the arguments of index types captured by the <code>flow.dispatch.workgroups</code> is treated as the workload for the operation. It is a requirement that the slice of the program that computes the number of workgroups will need to have its leaves be these captured values.</p> <p>TODO: This could be generalized in future to allow the slices to encompass arbitrary computation. The computation of the workgroup count can then be done on the device itself, if this is data dependent. In such cases the workload could be more than just values of index types.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_17","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_19","title":"Results:","text":"Result Description <code>x</code> index <code>y</code> index <code>z</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkloadordinal-flowdispatchworkloadordinalop","title":"<code>flow.dispatch.workload.ordinal</code> (Flow::DispatchWorkloadOrdinalOp)","text":"<p>Annotates the values captured as workload within the body of     <code>flow.dispatch.workgroups</code> op.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workload.ordinal` attr-dict $operand `,` $ordinal `:` type($operand)\n</code></pre> <p>The arguments that represent the captured/returned values of the `flow.dispatch.workgroups, i.e. the signature of the body of the op is not preserved during IREEs compilation. Since the workloads are derived from the operands captured by the operation, this op denotes the values captured as workloads. This can be used in the backends to map back to the workload values while materializing the workgroup count computation.</p> <p>TODO: Find a better way to represent this information, either by somehow propagating the signature of the created dispatch workgroup op through the compilation stack until the codegen backends, or as a separate list/attribute that can be plumbed through without using explicit ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>ordinal</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#operands_18","title":"Operands:","text":"Operand Description <code>operand</code> index"},{"location":"reference/mlir-dialects/Flow/#results_20","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowtensoralloca-flowtensorallocaop","title":"<code>flow.tensor.alloca</code> (Flow::TensorAllocaOp)","text":"<p>An empty tensor allocation with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.alloca` `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new transient tensor allocation with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read. The resulting tensor may be long-lived and allocated as part of a dedicated allocation. Prefer using <code>flow.tensor.empty</code> whenever possible as this op disables nearly all allocation-related optimizations performed by the compiler. The presence of this op is often an indication of an improper lowering.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_19","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_21","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorbitcast-flowtensorbitcastop","title":"<code>flow.tensor.bitcast</code> (Flow::TensorBitCastOp)","text":"<p>Bitcasts a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.bitcast` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Bitcasts a tensor to a new type without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_20","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_22","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorclone-flowtensorcloneop","title":"<code>flow.tensor.clone</code> (Flow::TensorCloneOp)","text":"<p>Performs a full tensor clone operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.clone` $operand `:` type($result) (`{` $argument_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones the input tensor into an identical output tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_21","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>argument_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_23","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorconstant-flowtensorconstantop","title":"<code>flow.tensor.constant</code> (Flow::TensorConstantOp)","text":"<p>Tensor constant that can have dynamic dimensions</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.constant` $value attr-dict `-&gt;` type($result)\n</code></pre> <p>Allows specifying a constant where the return value can erase shape information. This operation is declared as having side effects and has no folder, so will not be optimized away by the compiler. The underlying shape information should be hidden from the compiler and resolved at runtime.</p> <pre><code>%c = flow.tensor.constant tensor&lt;2x2xf32&gt; -&gt; tensor&lt;?x?xf32&gt;\n%res = math.absf %c : tensor&lt;?x?xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Flow/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Flow/#results_24","title":"Results:","text":"Result Description <code>result</code> tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorempty-flowtensoremptyop","title":"<code>flow.tensor.empty</code> (Flow::TensorEmptyOp)","text":"<p>An empty tensor carrying metadata but no contents</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.empty` `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_22","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_25","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorload-flowtensorloadop","title":"<code>flow.tensor.load</code> (Flow::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.load` $source (`[` $indices^ `]`)? `:`\n              type($source) (`{` $source_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_23","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_26","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorreshape-flowtensorreshapeop","title":"<code>flow.tensor.reshape</code> (Flow::TensorReshapeOp)","text":"<p>Reshapes a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.reshape` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Reshapes a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_24","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_27","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorslice-flowtensorsliceop","title":"<code>flow.tensor.slice</code> (Flow::TensorSliceOp)","text":"<p>Slices out a subregion of a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones a subregion of a tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_25","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_28","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorsplat-flowtensorsplatop","title":"<code>flow.tensor.splat</code> (Flow::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_26","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_29","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorstore-flowtensorstoreop","title":"<code>flow.tensor.store</code> (Flow::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n              type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_27","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_30","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortie_shape-flowtensortieshapeop","title":"<code>flow.tensor.tie_shape</code> (Flow::TensorTieShapeOp)","text":"<p>Ties a runtime shape to a tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.tie_shape` $operand attr-dict\n              `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Metadata op used to tie tensors with their runtime-computed dynamic dimensions. This only exists transiently in the IR as a witness to shape calculations and is removed during lowering.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_28","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_31","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortrace-flowtensortraceop","title":"<code>flow.tensor.trace</code> (Flow::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.trace` $key `=` `[`\n              custom&lt;ShapedOperandList&gt;($values, type($values), $value_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ShapeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#operands_29","title":"Operands:","text":"Operand Description <code>values</code> variadic of ranked tensor of any type values <code>value_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowtensorupdate-flowtensorupdateop","title":"<code>flow.tensor.update</code> (Flow::TensorUpdateOp)","text":"<p>Updates a tensor with the contents of another tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n              type($update) (`{` $update_dims^ `}`)? `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims)\n              attr-dict-with-keyword\n</code></pre> <p>Updates the target tensor with the contents of the update tensor at the given offset indices.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_30","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>start_indices</code> variadic of index <code>update</code> ranked tensor of any type values <code>update_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_32","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#attributes_20","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Flow/#dummyattr","title":"DummyAttr","text":"<p>Syntax: <code>#flow.dummy</code></p>"},{"location":"reference/mlir-dialects/Flow/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/Flow/#dispatchtensor","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region input/output operand. This can be used to query the metadata about the tensor (such as its shape) as well as both load and store from the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_1","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region input operand. This can be used to query the metadata about the tensor (such as its shape) as well as load from the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_2","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region output operand. This can be used to query the metadata about the tensor (such as its shape) as well as store to the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Flow/#channeltype","title":"ChannelType","text":"<p>a collecive communication channel</p> <p>Syntax: <code>!flow.channel</code></p> <p>Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.</p> <p>In programs that have already been partitioned prior to being compiled there will often exist only one channel and <code>flow.channel.default</code> can be used to reference it. In programs that model SPMD behavior internally channels can be created or provided by hosting applications.</p>"},{"location":"reference/mlir-dialects/Flow/#dummytype","title":"DummyType","text":"<p>Syntax: <code>!flow.dummy</code></p>"},{"location":"reference/mlir-dialects/HAL/","title":"HAL","text":""},{"location":"reference/mlir-dialects/HAL/#hal-dialect","title":"'hal' Dialect","text":"<p>A dialect representing operations against the IREE HAL.</p> <p>This can be thought of as a Vulkan-like model with all of the graphics bits chopped out.</p> <p>The type set is limited to those that can be represented in the IREE HAL design: buffers and views, synchronization primitives like semaphores, and and command buffers. The intent is that if a device could implement the HAL interface the sequencer ops could run on that device, such as being able to run on a GPU via indirect command buffers.</p> <p>Though this is mostly a 1:1 mapping to the iree::hal API there are some methods omitted as they are not likely to be needed in IR. It's assumed that either sequencer interfaces will encapsulate the logic (such as device resolution) or that certain features are unsafe to expose to user-defined input.</p> <ul> <li>'hal' Dialect<ul> <li>Operations<ul> <li>Allocator ops<ul> <li>hal.allocator.allocate (HAL::AllocatorAllocateOp)</li> <li>hal.allocator.import (HAL::AllocatorImportOp)</li> </ul> </li> <li>Buffer ops<ul> <li>hal.buffer.assert (HAL::BufferAssertOp)</li> <li>hal.buffer.length (HAL::BufferLengthOp)</li> <li>hal.buffer.load (HAL::BufferLoadOp)</li> <li>hal.buffer.store (HAL::BufferStoreOp)</li> <li>hal.buffer.subspan (HAL::BufferSubspanOp)</li> </ul> </li> <li>Buffer view ops<ul> <li>hal.buffer_view.assert (HAL::BufferViewAssertOp)</li> <li>hal.buffer_view.buffer (HAL::BufferViewBufferOp)</li> <li>hal.buffer_view.create (HAL::BufferViewCreateOp)</li> <li>hal.buffer_view.dim (HAL::BufferViewDimOp)</li> <li>hal.buffer_view.element_type (HAL::BufferViewElementTypeOp)</li> <li>hal.buffer_view.encoding_type (HAL::BufferViewEncodingTypeOp)</li> <li>hal.buffer_view.rank (HAL::BufferViewRankOp)</li> <li>hal.buffer_view.trace (HAL::BufferViewTraceOp)</li> <li>hal.element_type (HAL::ElementTypeOp)</li> <li>hal.encoding_type (HAL::EncodingTypeOp)</li> </ul> </li> <li>Channel ops<ul> <li>hal.channel.create (HAL::ChannelCreateOp)</li> <li>hal.channel.rank_and_count (HAL::ChannelRankAndCountOp)</li> <li>hal.channel.split (HAL::ChannelSplitOp)</li> </ul> </li> <li>Command buffer ops<ul> <li>hal.command_buffer.begin_debug_group (HAL::CommandBufferBeginDebugGroupOp)</li> <li>hal.command_buffer.collective (HAL::CommandBufferCollectiveOp)</li> <li>hal.command_buffer.copy_buffer (HAL::CommandBufferCopyBufferOp)</li> <li>hal.command_buffer.create (HAL::CommandBufferCreateOp)</li> <li>hal.command_buffer.device (HAL::CommandBufferDeviceOp)</li> <li>hal.command_buffer.dispatch.indirect (HAL::CommandBufferDispatchIndirectOp)</li> <li>hal.command_buffer.dispatch.indirect.symbol (HAL::CommandBufferDispatchIndirectSymbolOp)</li> <li>hal.command_buffer.dispatch (HAL::CommandBufferDispatchOp)</li> <li>hal.command_buffer.dispatch.symbol (HAL::CommandBufferDispatchSymbolOp)</li> <li>hal.command_buffer.end_debug_group (HAL::CommandBufferEndDebugGroupOp)</li> <li>hal.command_buffer.execution_barrier (HAL::CommandBufferExecutionBarrierOp)</li> <li>hal.command_buffer.fill_buffer (HAL::CommandBufferFillBufferOp)</li> <li>hal.command_buffer.finalize (HAL::CommandBufferFinalizeOp)</li> <li>hal.command_buffer.push_constants (HAL::CommandBufferPushConstantsOp)</li> <li>hal.command_buffer.push_descriptor_set (HAL::CommandBufferPushDescriptorSetOp)</li> </ul> </li> <li>Descriptor set layout ops<ul> <li>hal.descriptor_set_layout.create (HAL::DescriptorSetLayoutCreateOp)</li> </ul> </li> <li>Device management ops<ul> <li>hal.devices.count (HAL::DevicesCountOp)</li> <li>hal.devices.get (HAL::DevicesGetOp)</li> </ul> </li> <li>Device ops<ul> <li>hal.device.allocator (HAL::DeviceAllocatorOp)</li> <li>hal.device.query (HAL::DeviceQueryOp)</li> <li>hal.device.queue.alloca (HAL::DeviceQueueAllocaOp)</li> <li>hal.device.queue.dealloca (HAL::DeviceQueueDeallocaOp)</li> <li>hal.device.queue.execute (HAL::DeviceQueueExecuteOp)</li> <li>hal.device.queue.flush (HAL::DeviceQueueFlushOp)</li> <li>hal.device.queue.read (HAL::DeviceQueueReadOp)</li> <li>hal.device.queue.write (HAL::DeviceQueueWriteOp)</li> <li>hal.return (HAL::ReturnOp)</li> </ul> </li> <li>Executable ops<ul> <li>hal.executable.binary (HAL::ExecutableBinaryOp)</li> <li>hal.executable.calculate_workgroups (HAL::ExecutableCalculateWorkgroupsOp)</li> <li>hal.executable.condition (HAL::ExecutableConditionOp)</li> <li>hal.executable.constant.block (HAL::ExecutableConstantBlockOp)</li> <li>hal.executable.constant.load (HAL::ExecutableConstantLoadOp)</li> <li>hal.executable.create (HAL::ExecutableCreateOp)</li> <li>hal.executable_end (HAL::ExecutableEndOp)</li> <li>hal.executable.export (HAL::ExecutableExportOp)</li> <li>hal.executable.lookup (HAL::ExecutableLookupOp)</li> <li>hal.executable (HAL::ExecutableOp)</li> <li>hal.executable.source_end (HAL::ExecutableSourceEndOp)</li> <li>hal.executable.source (HAL::ExecutableSourceOp)</li> <li>hal.executable.variant_end (HAL::ExecutableVariantEndOp)</li> <li>hal.executable.variant (HAL::ExecutableVariantOp)</li> </ul> </li> <li>Experimental ops<ul> <li>hal.ex.file.from_memory (HAL::ExFileFromMemoryOp)</li> </ul> </li> <li>Fence ops<ul> <li>hal.fence.await (HAL::FenceAwaitOp)</li> <li>hal.fence.create (HAL::FenceCreateOp)</li> <li>hal.fence.fail (HAL::FenceFailOp)</li> <li>hal.fence.join (HAL::FenceJoinOp)</li> <li>hal.fence.query (HAL::FenceQueryOp)</li> <li>hal.fence.signal (HAL::FenceSignalOp)</li> </ul> </li> <li>Instrument ops<ul> <li>hal.instrument.memory.load (HAL::InstrumentMemoryLoadOp)</li> <li>hal.instrument.memory.store (HAL::InstrumentMemoryStoreOp)</li> <li>hal.instrument.print (HAL::InstrumentPrintOp)</li> <li>hal.instrument.value (HAL::InstrumentValueOp)</li> <li>hal.instrument.workgroup (HAL::InstrumentWorkgroupOp)</li> </ul> </li> <li>Interface ops<ul> <li>hal.interface.binding.subspan (HAL::InterfaceBindingSubspanOp)</li> <li>hal.interface.constant.load (HAL::InterfaceConstantLoadOp)</li> <li>hal.interface.workgroup.count (HAL::InterfaceWorkgroupCountOp)</li> <li>hal.interface.workgroup.id (HAL::InterfaceWorkgroupIDOp)</li> <li>hal.interface.workgroup.size (HAL::InterfaceWorkgroupSizeOp)</li> </ul> </li> <li>Pipeline layout ops<ul> <li>hal.pipeline_layout.create (HAL::PipelineLayoutCreateOp)</li> <li>hal.pipeline_layout.lookup (HAL::PipelineLayoutLookupOp)</li> </ul> </li> <li>Pseudo Ops<ul> <li>hal.dispatch.extern (HAL::DispatchExternOp)</li> <li>hal.tensor.barrier (HAL::TensorBarrierOp)</li> <li>hal.tensor.export (HAL::TensorExportOp)</li> <li>hal.tensor.import (HAL::TensorImportOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>AffinityQueueAttr</li> <li>CollectiveAttr</li> <li>DescriptorSetBindingAttr</li> <li>DescriptorSetLayoutAttr</li> <li>DescriptorTypeAttr</li> <li>DeviceTargetAttr</li> <li>ExecutableObjectAttr</li> <li>ExecutableObjectsAttr</li> <li>ExecutableTargetAttr</li> <li>InterfaceBindingAttr</li> <li>PipelineLayoutAttr</li> </ul> </li> <li>Type constraints<ul> <li>allocator</li> <li>buffer</li> <li>buffer_view</li> <li>collective.channel</li> <li>command_buffer</li> <li>descriptor_set_layout</li> <li>device</li> <li>event</li> <li>executable</li> <li>fence</li> <li>buffer</li> <li>pipeline_layout</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HAL/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HAL/#allocator-ops","title":"Allocator ops","text":"<p>Ops for <code>!hal.allocator</code> / <code>iree_hal_allocator_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halallocatorallocate-halallocatorallocateop","title":"<code>hal.allocator.allocate</code> (HAL::AllocatorAllocateOp)","text":"<p>Empty buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.allocator.allocate` `&lt;` $allocator `:` type($allocator) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $result_size)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer of the given size from the allocator. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands","title":"Operands:","text":"Operand Description <code>allocator</code> allocator <code>queue_affinity</code> 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/HAL/#results","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#halallocatorimport-halallocatorimportop","title":"<code>hal.allocator.import</code> (HAL::AllocatorImportOp)","text":"<p>Allocator-supported host buffer import operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.allocator.import` `&lt;` $allocator `:` type($allocator) `&gt;`\n              `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `affinity` `(` $queue_affinity `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` type($did_import) `,` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Tries importing host memory backed by the given byte buffer into a device accessible <code>!hal.buffer</code>. The returned buffer may be host-only and not directly usable on devices. If the mapping cannot be completed (such as trying to map the host memory as device-local on devices with discrete memory) then <code>did_import</code> will indicate that the returned buffer is null.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_1","title":"Operands:","text":"Operand Description <code>allocator</code> allocator <code>queue_affinity</code> 64-bit signless integer <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#results_1","title":"Results:","text":"Result Description <code>did_import</code> 1-bit signless integer <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-ops","title":"Buffer ops","text":"<p>Ops for <code>!hal.buffer</code> / <code>iree_hal_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halbufferassert-halbufferassertop","title":"<code>hal.buffer.assert</code> (HAL::BufferAssertOp)","text":"<p>Buffer compatibility assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.assert` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `message` `(` $message `)`\n              `allocator` `(` $allocator `:` type($allocator) `)`\n              `minimum_length` `(` $minimum_length `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer is compatible with the given allocator and usage. Program execution will abort as if <code>std.assert</code> had been used.</p> <p>This only checks that the buffer can be used and not that it matches the given parameters exactly. Buffers may be from other allocators so long as the allocators are compatible (devices can address each other's memory), the type and usage contain all the requested bits (having more bits is ok), and the length is at least the requested minimum (as padding may be ignored).</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_2","title":"Operands:","text":"Operand Description <code>buffer</code> buffer <code>allocator</code> allocator <code>minimum_length</code> index"},{"location":"reference/mlir-dialects/HAL/#halbufferlength-halbufferlengthop","title":"<code>hal.buffer.length</code> (HAL::BufferLengthOp)","text":"<p>Buffer byte length accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.length` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_3","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HAL/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbufferload-halbufferloadop","title":"<code>hal.buffer.load</code> (HAL::BufferLoadOp)","text":"<p>Buffer element load operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.load` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Loads a value from a buffer by mapping it.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_4","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#results_3","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/HAL/#halbufferstore-halbufferstoreop","title":"<code>hal.buffer.store</code> (HAL::BufferStoreOp)","text":"<p>Buffer element store operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.store` `&lt;` $target_buffer `:` type($target_buffer) `&gt;`\n              `` `[` $target_offset `]`\n              `value` `(` $value `:` type($value) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Stores a value into a buffer by mapping it.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_5","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target_buffer</code> buffer <code>target_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffersubspan-halbuffersubspanop","title":"<code>hal.buffer.subspan</code> (HAL::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_6","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#results_4","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-view-ops","title":"Buffer view ops","text":"<p>Ops for <code>!hal.buffer_view</code> / <code>iree_hal_buffer_view_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewassert-halbufferviewassertop","title":"<code>hal.buffer_view.assert</code> (HAL::BufferViewAssertOp)","text":"<p>Buffer view contents assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.assert` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `message` `(` $message `)`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if <code>std.assert</code> had been used.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_7","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewbuffer-halbufferviewbufferop","title":"<code>hal.buffer_view.buffer</code> (HAL::BufferViewBufferOp)","text":"<p>Buffer view buffer accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.buffer` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer backing this view's contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_8","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_5","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewcreate-halbufferviewcreateop","title":"<code>hal.buffer_view.create</code> (HAL::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_9","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_6","title":"Results:","text":"Result Description <code>result</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewdim-halbufferviewdimop","title":"<code>hal.buffer_view.dim</code> (HAL::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.dim` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `` `[` $index `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_10","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_7","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewelement_type-halbufferviewelementtypeop","title":"<code>hal.buffer_view.element_type</code> (HAL::BufferViewElementTypeOp)","text":"<p>Buffer view element type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.element_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_11","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_8","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewencoding_type-halbufferviewencodingtypeop","title":"<code>hal.buffer_view.encoding_type</code> (HAL::BufferViewEncodingTypeOp)","text":"<p>Buffer view encoding type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.encoding_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the encoding type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_12","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_9","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewrank-halbufferviewrankop","title":"<code>hal.buffer_view.rank</code> (HAL::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.rank` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_13","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_10","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewtrace-halbufferviewtraceop","title":"<code>hal.buffer_view.trace</code> (HAL::BufferViewTraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.trace` $key `=`\n              $operands `:` type($operands)\n              attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_14","title":"Operands:","text":"Operand Description <code>operands</code> variadic of buffer_view"},{"location":"reference/mlir-dialects/HAL/#halelement_type-halelementtypeop","title":"<code>hal.element_type</code> (HAL::ElementTypeOp)","text":"<p>An iree_hal_element_type_t for the given MLIR type</p> <p>Syntax:</p> <pre><code>operation ::= `hal.element_type` `&lt;` $type `&gt;`\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Maps an MLIR type to a runtime <code>iree_hal_element_type_t</code> value for all types that are convertable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>type</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/HAL/#results_11","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halencoding_type-halencodingtypeop","title":"<code>hal.encoding_type</code> (HAL::EncodingTypeOp)","text":"<p>An iree_hal_encoding_type_t for the given MLIR encoding</p> <p>Syntax:</p> <pre><code>operation ::= `hal.encoding_type` `&lt;` ($encoding^):( `` `dense_row_major`)? `&gt;`\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Maps an MLIR encoding to a runtime <code>iree_hal_encoding_type_t</code> value for all encodings that are convertable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>encoding</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HAL/#results_12","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#channel-ops","title":"Channel ops","text":"<p>Ops for <code>!hal.channel</code> / <code>iree_hal_channel_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halchannelcreate-halchannelcreateop","title":"<code>hal.channel.create</code> (HAL::ChannelCreateOp)","text":"<p>Creates a new channel for collective communication</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.create` `device` `(` $device `:` type($device) `)`\n              `affinity` `(` $queue_affinity `)`\n              `flags` `(` $flags `)`\n              `id` `(` $id `)`\n              `group` `(` $group `)`\n              `rank` `(` $rank `)`\n              `count` `(` $count `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new channel with the given rank associated with the given device queue. Collective operations using this channel must only be submitted on compatible queues.</p> <p>The group and ID are optional and may be null. A rank or count of -1 can be used to indicate a default inherited from the environment or device configuration.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_15","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>id</code> a reference counted byte buffer <code>group</code> a reference counted byte buffer <code>rank</code> 32-bit signless integer <code>count</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_13","title":"Results:","text":"Result Description <code>result</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#halchannelrank_and_count-halchannelrankandcountop","title":"<code>hal.channel.rank_and_count</code> (HAL::ChannelRankAndCountOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.rank_and_count` `&lt;` $channel `:` type($channel) `&gt;`\n              `:` type($rank) `,` type($count)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code> and the total participant count.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_16","title":"Operands:","text":"Operand Description <code>channel</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#results_14","title":"Results:","text":"Result Description <code>rank</code> 32-bit signless integer <code>count</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halchannelsplit-halchannelsplitop","title":"<code>hal.channel.split</code> (HAL::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.split` `&lt;` $channel `:` type($channel) `&gt;`\n              `color` `(` $color `)`\n              `key` `(` $key `)`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_17","title":"Operands:","text":"Operand Description <code>channel</code> collective.channel <code>color</code> 32-bit signless integer <code>key</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_15","title":"Results:","text":"Result Description <code>result</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#command-buffer-ops","title":"Command buffer ops","text":"<p>Ops for <code>!hal.command_buffer</code> / <code>iree_hal_command_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferbegin_debug_group-halcommandbufferbegindebuggroupop","title":"<code>hal.command_buffer.begin_debug_group</code> (HAL::CommandBufferBeginDebugGroupOp)","text":"<p>Pushes a command buffer debug group label</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.begin_debug_group` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `label` `(` $label `)`\n              attr-dict-with-keyword\n</code></pre> <p>Pushes a new debug group with the given label. All commands between this and a mandatory matching call to <code>hal.command_buffer.end_debug_group</code> will be grouped together with the given label.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>label</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_18","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercollective-halcommandbuffercollectiveop","title":"<code>hal.command_buffer.collective</code> (HAL::CommandBufferCollectiveOp)","text":"<p>Command buffer collective dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.collective` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `channel` `(` $channel `:` type($channel) `)`\n              `op` `(` $op `)`\n              (`param` `(` $param^ `:` type($param) `)`)?\n              (`send` `(` $send_buffer^ `:` type($send_buffer) `)`\n              `` `[` $send_offset `,` $send_length `]`)?\n              (`recv` `(` $recv_buffer^ `:` type($recv_buffer) `)`\n              `` `[` $recv_offset `,` $recv_length `]`)?\n              `count` `(` $element_count `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches a collective operation defined by op using the given buffers.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::HAL::CollectiveAttrcollective operation and specification"},{"location":"reference/mlir-dialects/HAL/#operands_19","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>channel</code> collective.channel <code>element_count</code> index <code>param</code> 32-bit signless integer <code>send_buffer</code> buffer <code>send_offset</code> index <code>send_length</code> index <code>recv_buffer</code> buffer <code>recv_offset</code> index <code>recv_length</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercopy_buffer-halcommandbuffercopybufferop","title":"<code>hal.command_buffer.copy_buffer</code> (HAL::CommandBufferCopyBufferOp)","text":"<p>Command buffer buffer copy recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.copy_buffer` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `source` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a range of one buffer to another.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_20","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>source_buffer</code> buffer <code>source_offset</code> index <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercreate-halcommandbuffercreateop","title":"<code>hal.command_buffer.create</code> (HAL::CommandBufferCreateOp)","text":"<p>Command buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.create` `device` `(` $device `:` type($device) `)`\n              `mode` `(` $modes `)`\n              `categories` `(` $command_categories `)`\n              (`bindings` `(` $binding_capacity^ `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a command buffer from the device pool ready to begin recording.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>modes</code>mlir::iree_compiler::IREE::HAL::CommandBufferModeBitfieldAttrvalid CommandBufferMode <code>command_categories</code>mlir::iree_compiler::IREE::HAL::CommandCategoryBitfieldAttrvalid CommandCategory"},{"location":"reference/mlir-dialects/HAL/#operands_21","title":"Operands:","text":"Operand Description <code>device</code> device <code>binding_capacity</code> index"},{"location":"reference/mlir-dialects/HAL/#results_16","title":"Results:","text":"Result Description <code>result</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdevice-halcommandbufferdeviceop","title":"<code>hal.command_buffer.device</code> (HAL::CommandBufferDeviceOp)","text":"<p>Command buffer device query operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.device` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `:` type($device)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to access the device used to create a command buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_22","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#results_17","title":"Results:","text":"Result Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirect-halcommandbufferdispatchindirectop","title":"<code>hal.command_buffer.dispatch.indirect</code> (HAL::CommandBufferDispatchIndirectOp)","text":"<p>Command buffer indirect dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.indirect` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n              `` `[` $workgroups_offset `]`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request with the dispatch parameters loaded from the given buffer.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_23","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>executable</code> executable <code>workgroups_buffer</code> buffer <code>workgroups_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirectsymbol-halcommandbufferdispatchindirectsymbolop","title":"<code>hal.command_buffer.dispatch.indirect.symbol</code> (HAL::CommandBufferDispatchIndirectSymbolOp)","text":"<p>Command buffer indirect dispatch recording operation, using symbolref</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.indirect.symbol` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n              `` `[` $workgroups_offset `]`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request with the dispatch parameters loaded from the given buffer, using using a nested symbol reference to the entry point.</p> <pre><code>hal.command_buffer.dispatch.indirect.symbol %cmd, @executable::@target::@entry,\n                                            workgroups = %buffer[%offset]\n</code></pre>"},{"location":"reference/mlir-dialects/HAL/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_24","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>workgroups_buffer</code> buffer <code>workgroups_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatch-halcommandbufferdispatchop","title":"<code>hal.command_buffer.dispatch</code> (HAL::CommandBufferDispatchOp)","text":"<p>Command buffer dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_25","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchsymbol-halcommandbufferdispatchsymbolop","title":"<code>hal.command_buffer.dispatch.symbol</code> (HAL::CommandBufferDispatchSymbolOp)","text":"<p>Command buffer dispatch recording operation, using symbolref</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.symbol` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request, using a nested symbol reference to the entry point.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_26","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferend_debug_group-halcommandbufferenddebuggroupop","title":"<code>hal.command_buffer.end_debug_group</code> (HAL::CommandBufferEndDebugGroupOp)","text":"<p>Pops a command buffer debug group label</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.end_debug_group` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Pops a debug group from the stack.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_27","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferexecution_barrier-halcommandbufferexecutionbarrierop","title":"<code>hal.command_buffer.execution_barrier</code> (HAL::CommandBufferExecutionBarrierOp)","text":"<p>Command buffer execution barrier recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.execution_barrier` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `source` `(` $source_stage_mask `)`\n              `target` `(` $target_stage_mask `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines an execution dependency between all commands recorded before the barrier and all commands recorded after the barrier. Only the stages provided will be affected.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_stage_mask</code>mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage <code>target_stage_mask</code>mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage <code>flags</code>mlir::iree_compiler::IREE::HAL::ExecutionBarrierFlagBitfieldAttrvalid ExecutionBarrierFlag"},{"location":"reference/mlir-dialects/HAL/#operands_28","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfill_buffer-halcommandbufferfillbufferop","title":"<code>hal.command_buffer.fill_buffer</code> (HAL::CommandBufferFillBufferOp)","text":"<p>Command buffer buffer fill recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.fill_buffer` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `,` $length `]`\n              `pattern` `(` $pattern `:` type($pattern) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Fills the target buffer with the given repeating value.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_29","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index <code>pattern</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfinalize-halcommandbufferfinalizeop","title":"<code>hal.command_buffer.finalize</code> (HAL::CommandBufferFinalizeOp)","text":"<p>Finalizes command buffer recording</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.finalize` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Ends recording into the command buffer and prepares it for submission. No more commands may be recorded into the command buffer.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_30","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_constants-halcommandbufferpushconstantsop","title":"<code>hal.command_buffer.push_constants</code> (HAL::CommandBufferPushConstantsOp)","text":"<p>Command buffer push constants operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.push_constants` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n              `offset` `(` $offset `)`\n              `values` `(` `[` $values `]` `)`\n              `:` type($values)\n              attr-dict-with-keyword\n</code></pre> <p>Pushes an inline set of constants that can be accessed by subsequent dispatches using a compatible pipeline layout.</p> <p>Push constants are always 4-byte values and treated as opaque, meaning that they may be bit-casted floats, bit-packed booleans, etc.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>offset</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_31","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>pipeline_layout</code> pipeline_layout <code>values</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_descriptor_set-halcommandbufferpushdescriptorsetop","title":"<code>hal.command_buffer.push_descriptor_set</code> (HAL::CommandBufferPushDescriptorSetOp)","text":"<p>Command buffer descriptor set push binding operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.push_descriptor_set` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n              `` `[` $set `]`\n              `bindings` `(` `[`\n              custom&lt;DescriptorSetBindings&gt;($binding_ordinals,\n              $binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Pushes an inline-defined descriptor set to the command buffer. The provided buffers may either be HAL buffers or indirect references into the command buffer binding table.</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_32","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>pipeline_layout</code> pipeline_layout <code>set</code> index <code>binding_ordinals</code> variadic of index <code>binding_buffers</code> variadic of index or buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#descriptor-set-layout-ops","title":"Descriptor set layout ops","text":"<p>Ops for <code>!hal.descriptor_set_layout</code> / <code>iree_hal_descriptor_set_layout_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutcreate-haldescriptorsetlayoutcreateop","title":"<code>hal.descriptor_set_layout.create</code> (HAL::DescriptorSetLayoutCreateOp)","text":"<p>Creates a descriptor set layout</p> <p>Syntax:</p> <pre><code>operation ::= `hal.descriptor_set_layout.create` `device` `(` $device `:` type($device) `)`\n              `flags` `(` $flags `)`\n              `bindings` `(` $bindings `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a descriptor set layout that defines the bindings used within a set. The same descriptor set layout may be shared with many different executable layouts and by doing so some runtime binding overhead when switching between executables that use the same set layouts can be reduced.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags <code>bindings</code>::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_33","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_18","title":"Results:","text":"Result Description <code>result</code> descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#device-management-ops","title":"Device management ops","text":"<p>Device availability and selection support.</p>"},{"location":"reference/mlir-dialects/HAL/#haldevicescount-haldevicescountop","title":"<code>hal.devices.count</code> (HAL::DevicesCountOp)","text":"<p>Returns the number of available devices</p> <p>Syntax:</p> <pre><code>operation ::= `hal.devices.count` attr-dict `:` type($result)\n</code></pre> <p>Returns the total number of available devices registered at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#results_19","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#haldevicesget-haldevicesgetop","title":"<code>hal.devices.get</code> (HAL::DevicesGetOp)","text":"<p>Returns the device with the given index</p> <p>Syntax:</p> <pre><code>operation ::= `hal.devices.get` $index attr-dict `:` type($result)\n</code></pre> <p>Returns the device with the given index in the [0, hal.devices.count) range. Devices may be lazily initialized upon first use.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_34","title":"Operands:","text":"Operand Description <code>index</code> index"},{"location":"reference/mlir-dialects/HAL/#results_20","title":"Results:","text":"Result Description <code>result</code> device"},{"location":"reference/mlir-dialects/HAL/#device-ops","title":"Device ops","text":"<p>Ops for <code>!hal.device</code> / <code>iree_hal_device_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#haldeviceallocator-haldeviceallocatorop","title":"<code>hal.device.allocator</code> (HAL::DeviceAllocatorOp)","text":"<p>Device allocator accessor operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.allocator` `&lt;` $device `:` type($device) `&gt;` `:` type($result) attr-dict-with-keyword\n</code></pre> <p>Returns the allocator that can be used to allocate buffers compatible with the device.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_35","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_21","title":"Results:","text":"Result Description <code>result</code> allocator"},{"location":"reference/mlir-dialects/HAL/#haldevicequery-haldevicequeryop","title":"<code>hal.device.query</code> (HAL::DeviceQueryOp)","text":"<p>Returns a runtime configuration parameter from the device</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.query` `&lt;` $device `:` type($device) `&gt;`\n              `key` `(` $category `:` `` `:` $key `)`\n              `:` type($ok) `,` type($value)\n              (`=` $default_value^)?\n              attr-dict-with-keyword\n</code></pre> <p>Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.</p> <p>This is roughly equivalent to the <code>sysconf</code> linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent.</p> <p>Users of the op must check the <code>ok</code> result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with <code>ex.</code> when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.</p> <p>Well-known keys:</p> <ul> <li> <p>hal.device.id :: {some id pattern}   Returns 1 if the device identifier matches the given pattern string.</p> </li> <li> <p>hal.executable.format :: {some format pattern}   Returns 1 if the given format is supported by the device loader.</p> </li> <li> <p>hal.device :: concurrency   The maximum concurrently executable submissions, mapping roughly to the   queue count. The actual concurrency available may be less than this based   on dynamic runtime parameters such as power/thermal modes, quota limits,   or user choice.</p> </li> <li> <p>hal.dispatch :: concurrency   The maximum concurrently executable workgroups for a particular dispatch.   The actual concurrency available may be less depending on device state.</p> </li> </ul> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>category</code>::mlir::StringAttrstring attribute <code>key</code>::mlir::StringAttrstring attribute <code>default_value</code>::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/HAL/#operands_36","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_22","title":"Results:","text":"Result Description <code>ok</code> 1-bit signless integer <code>value</code> any type"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuealloca-haldevicequeueallocaop","title":"<code>hal.device.queue.alloca</code> (HAL::DeviceQueueAllocaOp)","text":"<p>Allocates a queue-ordered transient buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.alloca` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `pool` `(` $pool `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $result_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a queue-ordered transient buffer that will be available for use when the signal fence is reached. The allocation will not be made until the wait fence has been reached.</p> <p>The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>The buffer handle will remain live so long as there are retainers but the contents are undefined before the allocation signal fence has been signaled and after the deallocation wait fence has been reached.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_37","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>pool</code> 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/HAL/#results_23","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuedealloca-haldevicequeuedeallocaop","title":"<code>hal.device.queue.dealloca</code> (HAL::DeviceQueueDeallocaOp)","text":"<p>Deallocates a queue-ordered transient buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.dealloca` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `buffer` `(` $buffer `:` type($buffer) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Deallocates a queue-ordered transient buffer. The deallocation will not be made until the wait fence has been reached and once the storage is available for reuse the signal fence will be signaled.</p> <p>After deallocation the contents of the buffer may still be accessible but will have undefined contents as other operations reuse the memory.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_38","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueexecute-haldevicequeueexecuteop","title":"<code>hal.device.queue.execute</code> (HAL::DeviceQueueExecuteOp)","text":"<p>Enqueues command buffer execution</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.execute` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              (`commands` `(` `[` $command_buffers^ `]` `)`)?\n              attr-dict-with-keyword\n</code></pre> <p>Executes one or more command buffers on a device queue. The command buffers are executed in order as if they were recorded as one. No commands will execute until the wait fence has been reached and the signal fence will be signaled when all commands have completed.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_39","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>command_buffers</code> variadic of command_buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueflush-haldevicequeueflushop","title":"<code>hal.device.queue.flush</code> (HAL::DeviceQueueFlushOp)","text":"<p>Flushes locally-pending submissions to the queue</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.flush` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              attr-dict-with-keyword\n</code></pre> <p>Flushes any locally-pending submissions in the queue. When submitting many queue operations this can be used to eagerly flush earlier submissions while later ones are still being constructed. This may be a no-op.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_40","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueread-haldevicequeuereadop","title":"<code>hal.device.queue.read</code> (HAL::DeviceQueueReadOp)","text":"<p>Reads a segment from a file into a device buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.read` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `source` `(` $source_file `:` type($source_file) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Enqueues a file read operation that streams a segment of the source file defined by the source offset and length into the target HAL buffer at the specified target offset. The queue affinity should be set to where the target buffer will be consumed. The source file must have read permission and the target buffer must have transfer-target usage. Read failure will result in propagated semaphore failure or device loss.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_41","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_file</code> buffer <code>source_offset</code> 64-bit signless integer <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuewrite-haldevicequeuewriteop","title":"<code>hal.device.queue.write</code> (HAL::DeviceQueueWriteOp)","text":"<p>Writes a segment from a device buffer into a file</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.write` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `source` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_file `:` type($target_file) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Enqueues a file write operation that streams a segment of the source HAL buffer defined by the source offset and length into the target file at the specified target offset. The queue affinity should be set to where the source buffer was produced. The source buffer must have transfer-source usage and the target file must have write permission. Write failure will result in propagated semaphore failure or device loss.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_42","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_buffer</code> buffer <code>source_offset</code> index <code>target_file</code> buffer <code>target_offset</code> 64-bit signless integer <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#halreturn-halreturnop","title":"<code>hal.return</code> (HAL::ReturnOp)","text":"<p>Return from a hal.* region</p> <p>Syntax:</p> <pre><code>operation ::= `hal.return` ($operands^ `:` type($operands))? attr-dict\n</code></pre> <p>Returns the given values from the region and back to the host code.</p> <p>Traits: <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_43","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#executable-ops","title":"Executable ops","text":"<p>Ops for <code>!hal.executable</code> / <code>iree_hal_executable_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablebinary-halexecutablebinaryop","title":"<code>hal.executable.binary</code> (HAL::ExecutableBinaryOp)","text":"<p>Compiled executable binary data</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.binary` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n</code></pre> <p>A compiled executable binary with an optional nested module containing the IR prior to serialization (for debugging).</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>format</code>::mlir::StringAttrstring attribute <code>data</code>::mlir::DenseIntElementsAttr8-bit signless integer elements attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablecalculate_workgroups-halexecutablecalculateworkgroupsop","title":"<code>hal.executable.calculate_workgroups</code> (HAL::ExecutableCalculateWorkgroupsOp)","text":"<p>Calculates workgroup count from workload for an exported function</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.calculate_workgroups` `device` `(` $device `:` type($device) `)`\n              `target` `(` $entry_point `)`\n              (`workload` `(` `[` $workload^ `]` `)`)?\n              `:` type($workgroup_x) `,` type($workgroup_y) `,` type($workgroup_z)\n              attr-dict-with-keyword\n</code></pre> <p>Calculates the workgroup count (grid XYZ) based on the given workload using the workgroup count calculation region of the target <code>hal.executable.export</code> op.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_44","title":"Operands:","text":"Operand Description <code>device</code> device <code>workload</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_24","title":"Results:","text":"Result Description <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halexecutablecondition-halexecutableconditionop","title":"<code>hal.executable.condition</code> (HAL::ExecutableConditionOp)","text":"<p>Host code to determine if the executable is enabled</p> <p>Variants are selected based on their target and this optional condition op that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantblock-halexecutableconstantblockop","title":"<code>hal.executable.constant.block</code> (HAL::ExecutableConstantBlockOp)","text":"<p>Executable constant block initializer</p> <p>Initializes one or more constants in the executable constant block by returning one value per identified constant. Each constant block is evaluated on the host prior to instantiating the executable for a given device and allows for the executable to be specialized based on device capabilities and limits.</p> <p>The keys specified are unique per variant and will be deduplicated across multiple constant blocks when present. They are only used during lowering and will not survive to runtime so they need only have descriptive enough names to avoid collisions and represent the semantics of the value.</p> <p>Constant values can be loaded in the device code with the <code>hal.executable.constant.load</code> op:</p> <pre><code>hal.executable.variant public @target {\n  hal.executable.constant.block(%device: !hal.device) -&gt; (i32, i32) as (\"foo\", \"bar\") {\n    %0 = hal.device.query&lt;%device&gt; key(\"some.device.prop\")...\n    %1 = hal.device.query&lt;%device&gt; key(\"another.device.prop\")...\n    hal.return %0, %1 : i32, i32\n  }\n  builtin.module {\n    func @dispatch0() {\n      %0 = hal.executable.constant.load \"foo\" : i32\n      %1 = hal.executable.constant.load \"bar\" : i32\n      return\n    }\n  }\n}\n</code></pre> <p>Each target backend will implement the constant initialization and access in a way compatible with its execution model. Examples: - CPU: read-only buffer initialized on load and passed to each dispatch - CUDA: read-only buffer initialized on load and passed to each dispatch - SPIR-V: specialization constants - Metal: function constants - WebGPU: pipeline-overridable constants</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp, IREE::HAL::ExecutableVariantOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>keys</code>::mlir::ArrayAttrarray attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantload-halexecutableconstantloadop","title":"<code>hal.executable.constant.load</code> (HAL::ExecutableConstantLoadOp)","text":"<p>Loads a constant value from the executable constant block</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.constant.load` $key attr-dict `:` type($result)\n</code></pre> <p>Loads a scalar constant value from the static executable constant block. The value provided by a constant block with the given key will be loaded and bitcast (possibly with truncation or zero-extension) to the result type.</p> <p>Note that backends are allowed to implement their own mechanisms for referencing constant block values and this is provided only as a default for those not needing special behavior.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#results_25","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halexecutablecreate-halexecutablecreateop","title":"<code>hal.executable.create</code> (HAL::ExecutableCreateOp)","text":"<p>Creates an executable</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.create` `device` `(` $device `:` type($device) `)`\n              `target` `(` $executable_target `)`\n              `layouts` `(` `[` $layouts `]` `)`\n              (`constants` `(` `[` $constants^ `]` `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a target-dependent executable cached on the provided device. Entry points contained within the executable can be dispatched using the resulting executable handle.</p> <p>Depending on the driver creation may take a non-trivial amount of time (such as when JITing/etc). As the cache is internally synchronized callers can issue preparation requests from multiple threads - even for the same executables - and calls will block until preparation completes.</p> <p>Optional constants provide for specialization of the executable based on runtime-derived parameters.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable_target</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_45","title":"Operands:","text":"Operand Description <code>device</code> device <code>layouts</code> variadic of pipeline_layout <code>constants</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_26","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable_end-halexecutableendop","title":"<code>hal.executable_end</code> (HAL::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutableexport-halexecutableexportop","title":"<code>hal.executable.export</code> (HAL::ExecutableExportOp)","text":"<p>Executable entry point declaration</p> <p>An entry point exported by the executable with statically-available information describing the IO interface it uses and other dispatch metadata.</p> <p>The <code>workgroup_count</code> region represents the computation that returns the number of workgroups to use in the 3D grid dispatch. The arguments to the region represents the workload as captured by each dispatch. It returns the number of workgroups along x, y, and z.</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp, IREE::HAL::ExecutableVariantOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrsize_t <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablelookup-halexecutablelookupop","title":"<code>hal.executable.lookup</code> (HAL::ExecutableLookupOp)","text":"<p>Executable cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.lookup` `device` `(` $device `:` type($device) `)`\n              `executable` `(` $executable `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_46","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_27","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable-halexecutableop","title":"<code>hal.executable</code> (HAL::ExecutableOp)","text":"<p>Target-specific executable module</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module representing a target-specific compiled kernel/shader/etc.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource_end-halexecutablesourceendop","title":"<code>hal.executable.source_end</code> (HAL::ExecutableSourceEndOp)","text":"<p>Terminator pseudo-op for the executable source op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.source_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource-halexecutablesourceop","title":"<code>hal.executable.source</code> (HAL::ExecutableSourceOp)","text":"<p>Generic source contents of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.source` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              ``\n              $body\n</code></pre> <p>This is an unspecialized source representation of an executable module without an assigned target. This is useful for hand-authoring executables prior to device specification.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableSourceEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>objects</code>::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant_end-halexecutablevariantendop","title":"<code>hal.executable.variant_end</code> (HAL::ExecutableVariantEndOp)","text":"<p>Terminator pseudo-op for the executable variant op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.variant_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableVariantOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant-halexecutablevariantop","title":"<code>hal.executable.variant</code> (HAL::ExecutableVariantOp)","text":"<p>Target-specific variant of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.variant` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              `target` `(` $target `)`\n              (`objects` `(` $objects^ `)` )?\n              attr-dict-with-keyword\n              $body\n</code></pre> <p>The target IR for the executable. This can be preserved for debugging but is usually removed during transformation.</p> <p>Variants are selected based on their target and an optional condition op that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableVariantEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>target</code>::mlir::iree_compiler::IREE::HAL::ExecutableTargetAttrgeneric executable target specification <code>objects</code>::mlir::ArrayAttrHAL executable object references"},{"location":"reference/mlir-dialects/HAL/#experimental-ops","title":"Experimental ops","text":"<p>Temporary hack ops expected to be removed in the future.</p>"},{"location":"reference/mlir-dialects/HAL/#halexfilefrom_memory-halexfilefrommemoryop","title":"<code>hal.ex.file.from_memory</code> (HAL::ExFileFromMemoryOp)","text":"<p>Creates a file mapped into a byte range of a host buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.ex.file.from_memory` `device` `(` $device `:` type($device) `)`\n              `affinity` `(` $queue_affinity `)`\n              `access` `(` $access `)`\n              `buffer` `(` $buffer `:` type($buffer) `)`\n              `` `[` $offset `for` $length `]`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a file handle that is backed by the given <code>buffer</code> contents. Behavior is undefined if the buffer contents change while the accesses are in-flight.</p> <p>Experimental as the exact interface for getting files from module contents still needs iteration. Most hardware APIs require a file descriptor or native platform handle but here we only have host pointers. When memory-mapped some systems allow for retrieval of the platform handle from a virtual address (GetMappedFileNameA/posix_mem_offset) but the APIs are sketchy and likely slow. Instead we should probably have a way to query for a file handle derived from the calling module by stack-walking and asking the VM module for its handle. Until we can figure this out this method will be marked epxerimental.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>access</code>mlir::iree_compiler::IREE::HAL::MemoryAccessBitfieldAttrvalid MemoryAccess"},{"location":"reference/mlir-dialects/HAL/#operands_47","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>buffer</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index <code>flags</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_28","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#fence-ops","title":"Fence ops","text":"<p>Ops for <code>!hal.fence</code> / <code>iree_hal_fence_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halfenceawait-halfenceawaitop","title":"<code>hal.fence.await</code> (HAL::FenceAwaitOp)","text":"<p>Asynchronous fence wait operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.await` `until` `(` `[` $fences `]` `)`\n              `timeout_millis` `(` $timeout_millis `)`\n              `:` type($status)\n              attr-dict-with-keyword\n</code></pre> <p>Yields the caller until all fences is reached. Returns the <code>status</code> of the fence after the wait, with a non-zero value indicating failure.</p> <p>Traits: <code>Util_YieldPoint</code></p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_48","title":"Operands:","text":"Operand Description <code>timeout_millis</code> 32-bit signless integer <code>fences</code> variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_29","title":"Results:","text":"Result Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencecreate-halfencecreateop","title":"<code>hal.fence.create</code> (HAL::FenceCreateOp)","text":"<p>Creates an unsignaled fence</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.create` `device` `(` $device `:` type($device) `)`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a fence that defines a point in time. By default fences will remain unsignaled unless they are explicitly signaled with <code>hal.fence.signal</code> or asynchronously signaled by the device by passing them as an operand to queue submission ops.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>mlir::iree_compiler::IREE::HAL::FenceFlagBitfieldAttrvalid FenceFlag"},{"location":"reference/mlir-dialects/HAL/#operands_49","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_30","title":"Results:","text":"Result Description <code>result</code> fence"},{"location":"reference/mlir-dialects/HAL/#halfencefail-halfencefailop","title":"<code>hal.fence.fail</code> (HAL::FenceFailOp)","text":"<p>Fence failure operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.fail` `&lt;` $fence `:` type($fence) `&gt;`\n              `status` `(` $status `)`\n              attr-dict-with-keyword\n</code></pre> <p>Signals the fence with a failure. The <code>status</code> will be returned from each timepoint semaphores <code>hal.semaphore.query</code> and <code>hal.semaphore.signal</code> for the lifetime of each semaphore.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_50","title":"Operands:","text":"Operand Description <code>fence</code> fence <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencejoin-halfencejoinop","title":"<code>hal.fence.join</code> (HAL::FenceJoinOp)","text":"<p>Creates a fence from the given timepoints</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.join` `at` `(` `[` $fences `]` `)`\n              `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a fence that joins the input fences as a wait-all operation.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_51","title":"Operands:","text":"Operand Description <code>fences</code> variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_31","title":"Results:","text":"Result Description <code>result</code> fence"},{"location":"reference/mlir-dialects/HAL/#halfencequery-halfencequeryop","title":"<code>hal.fence.query</code> (HAL::FenceQueryOp)","text":"<p>Fence query operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.query` `&lt;` $fence `:` type($fence) `&gt;`\n              `:` type($status)\n              attr-dict-with-keyword\n</code></pre> <p>Queries whether the fence has been reached and its status. Returns OK if the fence has been signaled successfully, DEFERRED if it is unsignaled, and otherwise an error indicating the failure.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_52","title":"Operands:","text":"Operand Description <code>fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_32","title":"Results:","text":"Result Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencesignal-halfencesignalop","title":"<code>hal.fence.signal</code> (HAL::FenceSignalOp)","text":"<p>Fence signal operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.signal` `&lt;` $fence `:` type($fence) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Signals the fence to indicate that the timepoints contained have been reached. Waiting work may begin immediately.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_53","title":"Operands:","text":"Operand Description <code>fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#instrument-ops","title":"Instrument ops","text":"<p>Ops for <code>!hal.instrument.*</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemoryload-halinstrumentmemoryloadop","title":"<code>hal.instrument.memory.load</code> (HAL::InstrumentMemoryLoadOp)","text":"<p>Emits a memory load instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.memory.load` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $base `[` $indices `]` `,` $loadValue\n              attr-dict `:` type($base) `,` type($result)\n</code></pre> <p>Emits a workgroup-specific memory load event indicating that a number of bytes from the given resolved pointer have been loaded by the workgroup.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_54","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>loadValue</code> any type <code>base</code> memref of any type values <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_33","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemorystore-halinstrumentmemorystoreop","title":"<code>hal.instrument.memory.store</code> (HAL::InstrumentMemoryStoreOp)","text":"<p>Emits a memory store instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.memory.store` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $base `[` $indices `]` `,` $storeValue\n              attr-dict `:` type($base) `,` type($result)\n</code></pre> <p>Emits a workgroup-specific memory store event indicating that a number of bytes have been stored to the given resolved pointer by the workgroup.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_55","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>storeValue</code> any type <code>base</code> memref of any type values <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_34","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentprint-halinstrumentprintop","title":"<code>hal.instrument.print</code> (HAL::InstrumentPrintOp)","text":"<p>Emits a human-readable printf-style string event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.print` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $format (`*` `(` $values^ `:` type($values) `)`)?\n              attr-dict\n</code></pre> <p>Formats a string using a limited subset of printf format specifiers and the provided values and then emits an <code>iree_instrument_dispatch_print_t</code> event. Final formatted string lengths may be limited to as much as 1024 characters and should be kept as small as possible to avoid easily exceeding the instrumentation storage buffers with redundant strings.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_56","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>values</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentvalue-halinstrumentvalueop","title":"<code>hal.instrument.value</code> (HAL::InstrumentValueOp)","text":"<p>Emits a scalar value instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.value` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $ordinal `=` $operand attr-dict `:` type($operand)\n</code></pre> <p>Emits a workgroup-specific typed value with the given workgroup-relative ordinal.</p> <p>This op will be preserved even if the output is not used as it is only for debugging purposes.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>ordinal</code>::mlir::IntegerAttr8-bit integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_57","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>operand</code> any type"},{"location":"reference/mlir-dialects/HAL/#results_35","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentworkgroup-halinstrumentworkgroupop","title":"<code>hal.instrument.workgroup</code> (HAL::InstrumentWorkgroupOp)","text":"<p>Emits a dispatch workgroup instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.workgroup` `` `[` $buffer `:` type($buffer) `]`\n              `dispatch` `(` $dispatchId `)`\n              attr-dict `:` type($workgroupKey)\n</code></pre> <p>Emits an <code>iree_instrument_dispatch_workgroup_t</code> event into the instrumentation stream. The workgroup event identifies the unique dispatch, its workgroup count, and the ID of the emitting workgroup within the dispatch. Optionally targets that support querying the processor ID executing the workgroup can attach that information for tracking purposes.</p> <p>On targets such as CPUs where entire workgroups execute as atomic units only one workgroup event should be emitted. On targets such as GPUs where there may be multiple invocations executing as part of a single workgroup only the first invocation within the workgroup should emit the workgroup event (by checking if the LocalInvocationIndex or threadIdx == 0, etc).</p> <p>The resulting workgroup key is used by subsequent workgroup-specific instrumentation events.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_58","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>dispatchId</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_36","title":"Results:","text":"Result Description <code>workgroupKey</code> index"},{"location":"reference/mlir-dialects/HAL/#interface-ops","title":"Interface ops","text":"<p>Ops for <code>!hal.interface.*</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halinterfacebindingsubspan-halinterfacebindingsubspanop","title":"<code>hal.interface.binding.subspan</code> (HAL::InterfaceBindingSubspanOp)","text":"<p>Returns an alias to a subspan of interface binding data</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.binding.subspan` `set` `(` $set `)`\n              `binding` `(` $binding `)`\n              `type` `(` custom&lt;DescriptorType&gt;($descriptor_type) `)`\n              (`alignment` `(` $alignment^ `)`)?\n              (`offset` `(` $byte_offset^ `)`)?\n              (`flags` `(` $descriptor_flags^ `)`)?\n              attr-dict `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Returns a subspan of an interface binding storage buffer in a generic type. The exact shape, type, and alignment of the returned type are defined by the result type (tensor, memref, etc).</p> <p>An optional alignment indicates the byte alignment of the base binding resource. Note that the byte offset is added to the base and the alignment will be the minimum of the two.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>set</code>::mlir::IntegerAttrindex attribute <code>binding</code>::mlir::IntegerAttrindex attribute <code>descriptor_type</code>::mlir::iree_compiler::IREE::HAL::DescriptorTypeAttrvalid DescriptorType <code>alignment</code>::mlir::IntegerAttrindex attribute <code>descriptor_flags</code>::mlir::iree_compiler::IREE::HAL::DescriptorFlagsAttrvalid Descriptor flags"},{"location":"reference/mlir-dialects/HAL/#operands_59","title":"Operands:","text":"Operand Description <code>byte_offset</code> index <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_37","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceconstantload-halinterfaceconstantloadop","title":"<code>hal.interface.constant.load</code> (HAL::InterfaceConstantLoadOp)","text":"<p>Loads a constant value from the interface constant block</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.constant.load` `` `[` $index `]`\n              (`alignment` `(` $alignment^ `)`)?\n              (`values` `(` $values^ `)`)?\n              attr-dict `:` type($result)\n</code></pre> <p>Loads a scalar constant value from an executable IO push constant block. The value will be loaded from the given constant offset and will be bitcast (possibly with truncation or zero-extension) to the result type.</p> <p>An optional alignment indicates the byte alignment of potential values for the constant when it could be determined from analysis. If omitted the value may be anything and its interpretation is up to the usage. This is intended to provide pointer alignment-like semantics to constants that are used to index into binding resources.</p> <p>An optional set of values indicates all possible values that can be passed to the constant from all dispatch sites in the program. If omitted the value may be from an unanalyzable source (outside of the program, indirect, etc) and must be assumed to have any value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrsize_t <code>alignment</code>::mlir::IntegerAttrindex attribute <code>values</code>::mlir::ArrayAttrarray attribute"},{"location":"reference/mlir-dialects/HAL/#results_38","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupcount-halinterfaceworkgroupcountop","title":"<code>hal.interface.workgroup.count</code> (HAL::InterfaceWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid. Matches what was passed to the <code>hal.command_buffer.dispatch</code> command (or what was indirectly specified).</p> <p>Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.count[0] : index\n%y = hal.interface.workgroup.count[1] : index\n%z = hal.interface.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_39","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupid-halinterfaceworkgroupidop","title":"<code>hal.interface.workgroup.id</code> (HAL::InterfaceWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current tile in the range of <code>[0, hal.interface.workgroup.count)</code> along each XYZ dimension.</p> <p>Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.id[0] : index\n%y = hal.interface.workgroup.id[1] : index\n%z = hal.interface.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_40","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupsize-halinterfaceworkgroupsizeop","title":"<code>hal.interface.workgroup.size</code> (HAL::InterfaceWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.size[0] : index\n%y = hal.interface.workgroup.size[1] : index\n%z = hal.interface.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_41","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#pipeline-layout-ops","title":"Pipeline layout ops","text":"<p>Ops for <code>!hal.pipeline_layout</code> / <code>iree_hal_pipeline_layout_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutcreate-halpipelinelayoutcreateop","title":"<code>hal.pipeline_layout.create</code> (HAL::PipelineLayoutCreateOp)","text":"<p>Creates an pipeline layout</p> <p>Syntax:</p> <pre><code>operation ::= `hal.pipeline_layout.create` `device` `(` $device `:` type($device) `)`\n              `push_constants` `(` $push_constants `)`\n              `layouts` `(` `[` $set_layouts `]` `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates an pipeline layout from the given descriptor sets and push constant required size. Pipeline layouts can be shared across any executable that uses the same layout and push constant information. Sharing the layout between executables will reduce runtime binding overhead and it is often worth the cost to allow a small number of unused bindings in one executable such that it can share layouts with others that will be scheduled adjacent to it.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>push_constants</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_60","title":"Operands:","text":"Operand Description <code>device</code> device <code>set_layouts</code> variadic of descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#results_42","title":"Results:","text":"Result Description <code>result</code> pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutlookup-halpipelinelayoutlookupop","title":"<code>hal.pipeline_layout.lookup</code> (HAL::PipelineLayoutLookupOp)","text":"<p>Pipeline layout cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.pipeline_layout.lookup` `device` `(` $device `:` type($device) `)`\n              `layout` `(` $layout `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized pipeline layout.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification"},{"location":"reference/mlir-dialects/HAL/#operands_61","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_43","title":"Results:","text":"Result Description <code>result</code> pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#pseudo-ops","title":"Pseudo Ops","text":"<p>Pseudo ops for conversion support.</p>"},{"location":"reference/mlir-dialects/HAL/#haldispatchextern-haldispatchexternop","title":"<code>hal.dispatch.extern</code> (HAL::DispatchExternOp)","text":"<p>A dispatch of workgroups across a 3-dimensional grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.dispatch.extern` $export\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n              `count` `` custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              `layout` `(` $layout `)`\n              (`bindings` `(` $bindings^ `)`)?\n              `objects` `(` `{` custom&lt;TargetConditionObjects&gt;($targets,\n              $target_ordinals,\n              $target_objects,\n              $target_regions) `}` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches some number of workgroups across a 3-dimensional grid using a function defined externally in one or more referenced objects. Objects are declared per executable target and selected automatically during linking based on where the dispatch is used. Semantically this is equivalent to a <code>flow.dispatch.workgroups</code> but with the workgroup region invisible to the compiler. See <code>hal.executable</code> for more information about object linkage.</p> <p>Note that since this happens at tensor level the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors. Codegen guarantees this behavior but when sourcing externally authored dispatch functions it's critical that this behavior is observed.</p> <p>Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the platform-dependent primitives for accessing workgroup ID, size, and count intrinsics instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc). To aid deduplication code producing these external dispatches should try not to specialize early for particular shapes and instead emit the most generic code possible as having 500 slightly different <code>hal.dispatch.extern</code> ops pointing at the same object file is likely to require 500 copies of the object instead of 500 calls to the same object.</p> <p>Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it. If workgroup size or distribution varies based on the target device a <code>!hal.device</code> argument can be used by the workgroup count calculation region to factor in device parameters. See <code>hal.device.query</code> for more information on how to query information.</p> <pre><code>%r = hal.dispatch.extern \"some_function\"[%c5, %c5](%0, %1)\n    : (tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;\n  ...\n</code></pre> <p>The number of results of the operation is equal to the number of results in the type signature (<code>(tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;</code>). Each tensor argument and result in the type signature has a corresponding pipeline layout slot and must be declared. If multiple arguments or results share the same layout slot they can be aliased using the <code>bindings</code> attribute and otherwise each is assumed unique.</p> <p>There are no <code>arguments</code> operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, <code>-&gt; %0</code> would tie the first argument to the result. In that case, there would be no separate block argument for the result.</p> <p>Objects for multiple targets can be specified and the ones used are selected based on their target and an optional condition region that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>export</code>::mlir::StringAttrstring attribute <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification <code>targets</code>::mlir::ArrayAttrarray attribute <code>target_ordinals</code>::mlir::ArrayAttrArray of index ordinal attributes <code>target_objects</code>::mlir::ArrayAttrarray attribute <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute <code>bindings</code>::mlir::ArrayAttrHAL binding array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_62","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_44","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#haltensorbarrier-haltensorbarrierop","title":"<code>hal.tensor.barrier</code> (HAL::TensorBarrierOp)","text":"<p>Signals a fence when all tensors are available</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.barrier` `join` `` `(` $sources `:` type($sources) `)`\n              `=` `` `&gt;`\n              $signal_fence `:` type($signal_fence)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a barrier that is used to indicate availability of an entire set of tensors by signaling a fence. The source tensors are returned for chaining.</p> <p>Interfaces: <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_63","title":"Operands:","text":"Operand Description <code>sources</code> variadic of tensor of any type values <code>signal_fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_45","title":"Results:","text":"Result Description <code>results</code> variadic of tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#haltensorexport-haltensorexportop","title":"<code>hal.tensor.export</code> (HAL::TensorExportOp)","text":"<p>Exports a tensor to a HAL buffer view</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.export` $source\n              ($name^)?\n              (`into` `(` $target_storage^ `:` type($target_storage) `)`)?\n              `:`\n              custom&lt;TypeAlias&gt;($source_encoding, type($source)) (`{` $source_dims^ `}`)?\n              `-&gt;`\n              type($target)\n              attr-dict\n</code></pre> <p>Defines an export of an SSA-form tensor to an external HAL buffer view.</p> <p>The provided <code>source_encoding</code>, if different from the <code>source</code> type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.</p> <p>An optional <code>target_storage</code> buffer can be provided to hold the exported result. The export will fail at runtime if the storage is null or if it has insufficient capacity to store the output. The storage must be device-visible and defined for transfer-target and dispatch usage.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_64","title":"Operands:","text":"Operand Description <code>source</code> tensor of any type values <code>source_dims</code> variadic of index <code>target_storage</code> buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_46","title":"Results:","text":"Result Description <code>target</code> buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#haltensorimport-haltensorimportop","title":"<code>hal.tensor.import</code> (HAL::TensorImportOp)","text":"<p>Imports a tensor from a HAL buffer view</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.import` (`wait` `(` $wait_fence^ `)` `=` `` `&gt;`)?\n              $source\n              ($name^)?\n              `:` type($source) `-&gt;`\n              custom&lt;TypeAlias&gt;($target_encoding, type($target)) (`{` $target_dims^ `}`)?\n              attr-dict\n</code></pre> <p>Defines an import of an external HAL buffer view into a SSA-form tensor. An optional semaphore timepoint can be specified indicating when the buffer view is available for use. If no semaphore timepoint is provided it is assumed the buffer view is immediately available.</p> <p>The provided <code>target_encoding</code>, if different from the <code>target</code> type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_65","title":"Operands:","text":"Operand Description <code>source</code> buffer or buffer_view <code>target_dims</code> variadic of index <code>wait_fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_47","title":"Results:","text":"Result Description <code>target</code> tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#attributes_49","title":"Attributes","text":""},{"location":"reference/mlir-dialects/HAL/#affinityqueueattr","title":"AffinityQueueAttr","text":"<p>specifies a set of allowed queues for an operation</p> <p>WIP; see #10765. This may change in the future to either be a nested attribute on a larger affinity struct or be defined by an implementation of the affinity attr interface. For now this allows higher levels of the stack to specify queues such that the stream dialect can understand them and they can be lowered into the HAL dialect.</p> <p>Specifies that an annotated operation or scope is only allowed to execute on the set of queues (0-64) provided. Operations will not run on other queues.</p> <p>Example: <pre><code>// any queue\n#hal.affinity.queue&lt;*&gt;\n// queues 4 and 5\n#hal.affinity.queue&lt;[4, 5]&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters","title":"Parameters:","text":"Parameter C++ type Description mask <code>int64_t</code>"},{"location":"reference/mlir-dialects/HAL/#collectiveattr","title":"CollectiveAttr","text":"<p>collective operation and specification</p> <p>Syntax:</p> <pre><code>#hal.collective&lt;\n  CollectiveKind,   # kind\n  std::optional&lt;CollectiveReductionOp&gt;,   # reduction\n  CollectiveElementType   # element_type\n&gt;\n</code></pre> <p>Specifies the collective operation to perform and any mode bits required.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description kind <code>CollectiveKind</code> reduction <code>std::optional&lt;CollectiveReductionOp&gt;</code> element_type <code>CollectiveElementType</code>"},{"location":"reference/mlir-dialects/HAL/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"<p>descriptor set binding specification</p> <p>Syntax:</p> <pre><code>#hal.descriptor_set.binding&lt;\n  int64_t,   # ordinal\n  DescriptorType,   # type\n  std::optional&lt;DescriptorFlags&gt;   # flags\n&gt;\n</code></pre> <p>Specifies a single binding within a descriptor set layout.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> type <code>DescriptorType</code> flags <code>std::optional&lt;DescriptorFlags&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"<p>descriptor set layout specification</p> <p>Syntax:</p> <pre><code>#hal.descriptor_set.layout&lt;\n  int64_t,   # ordinal\n  ::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;,   # bindings\n  std::optional&lt;DescriptorSetLayoutFlags&gt;   # flags\n&gt;\n</code></pre> <p>Specifies the layout information of a single set of descriptors used within an pipeline layout. Multiple of these sets may be used by a single entry point to allow for bindings with similar update frequencies to be grouped.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> bindings <code>::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;</code> flags <code>std::optional&lt;DescriptorSetLayoutFlags&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#descriptortypeattr","title":"DescriptorTypeAttr","text":"<p>valid DescriptorType</p> <p>Syntax:</p> <pre><code>#hal.descriptor_type&lt;\n  ::mlir::iree_compiler::IREE::HAL::DescriptorType   # value\n&gt;\n</code></pre> <p>Enum cases: * uniform_buffer (<code>UniformBuffer</code>) * storage_buffer (<code>StorageBuffer</code>)</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::HAL::DescriptorType</code> an enum of type DescriptorType"},{"location":"reference/mlir-dialects/HAL/#devicetargetattr","title":"DeviceTargetAttr","text":"<p>generic device target specification</p> <p>Specifies the properties of a target runtime device. Target devices are specified with a canonical identifier matching those used by the runtime (such as <code>cpu</code>, <code>vulkan</code>, etc). Target devices may support several target executable formats specified with <code>#hal.executable.target</code>. An optional configuration dictionary allows for overriding backend defaults.</p> <p>Example: <pre><code>#hal.device.target&lt;\"llvm-cpu\", {\n  executable_targets = [\n    #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_32\"&gt;,\n    #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_64\"&gt;,\n  ]\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description deviceID <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executableobjectattr","title":"ExecutableObjectAttr","text":"<p>object file reference</p> <p>Defines an object file that can be linked into executables. Today this is only supported for external file references with paths the compiler can successfully resolve from its current working directory. Inlined data can optionally be provided to avoid the need for file system access and ensure the data source is attached to the IR as it makes its way through multiple compiler stages or reproducers.</p> <p>Future revisions may change this to an interface that allows both internal and external resources to define the object contents. Linking needs to be updated to support various object compositions and certain backends may require additional infrastructure support.</p> <p>In the long term the goal is to allow combinations of declared objects and generated code in order to give control of linking behavior to frontends. Instead of needing global command line flags to link in additional blobs the frontend can emit executables with the dependencies already defined per variant without needing to reach into the IREE compiler code.</p> <p>Example: <pre><code>#hal.executable.object&lt;{path = \"some/file.obj\"}&gt;\n#hal.executable.object&lt;{\n  path = \"some/embedded/file.obj\",\n  data = dense&lt;[...]&gt; : vector&lt;2048xi8&gt;\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description path <code>StringAttr</code> data <code>DenseIntElementsAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"<p>target-specific object file references</p> <p>A dictionary mapping executable target specifications to a list of objects. This is used to allow layers of the stack that support multi-targeting to specify information used during lowering into each particular target.</p> <p>The key attributes are matched against each target variant based on the backend and format as well as any configuration data provided. When comparing the configuration only fields present in both the key and target variant will be checked and must match. This allows specification of generic sets (\"all x86_64 targets get these objects\") as well as specific ones (\"only x86_64 targets with vector_size = 64 get these objects\").</p> <p>Example: <pre><code>#hal.executable.objects&lt;{\n  #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_64\"&gt; = [\n    #hal.executable.object&lt;{path = \"some/file_arm_64.obj\"}&gt;\n  ],\n  #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\"&gt; = [\n    #hal.executable.object&lt;{path = \"some/file_x86_64.obj\"}&gt;\n  ]\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description targets <code>ArrayAttr</code> targetObjects <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executabletargetattr","title":"ExecutableTargetAttr","text":"<p>generic executable target specification</p> <p>Specifies how to compile an executable for a specific target backend. A backend is used to translate and serialize the executable into the final form passed to the runtime. The format of the executable is a target-specific value indicating the required runtime support to load the deployed artifact. An optionally provided configuration dictionary overrides backend-specific defaults.</p> <p>Example: <pre><code>  // Produce a system-native ELF for x86-64 systems using the LLVM backend:\n  #hal.executable.target&lt;\"llvm-cpu\", \"system-elf-x86_64\", {\n    triple = \"x86_64-unknown-linux-elf\",\n    cpu = \"host\",\n    cpu_features = \"host\",\n    abi = \"lp32\",\n    ...\n  }&gt;\n</code></pre></p> <p>The same compilation backend may be used to translate executables for several different runtime devices. Likewise the same runtime device may use one of many different executable targets. Assume an N:M mapping between the two in all cases.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description backend <code>StringAttr</code> format <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/HAL/#interfacebindingattr","title":"InterfaceBindingAttr","text":"<p>interface binding specification</p> <p>Syntax:</p> <pre><code>#hal.interface.binding&lt;\n  int64_t,   # set\n  int64_t   # binding\n&gt;\n</code></pre> <p>Specifies the descriptor set and binding ordinal of a particular layout binding.</p> <p>Example: <pre><code>#hal.interface.binding&lt;0, 1&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description set <code>int64_t</code> binding <code>int64_t</code>"},{"location":"reference/mlir-dialects/HAL/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"<p>executable entry point layout specification</p> <p>Syntax:</p> <pre><code>#hal.pipeline.layout&lt;\n  int64_t,   # pushConstants\n  ::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;   # setLayouts\n&gt;\n</code></pre> <p>Specifies the layout information used for interacting with executable functions. This allows host code to correctly map parameters to the lower-level target-specific argument passing behavior.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_10","title":"Parameters:","text":"Parameter C++ type Description pushConstants <code>int64_t</code> setLayouts <code>::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/HAL/#allocator","title":"allocator","text":"<p>Allocates buffers for a particular device memory space.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer","title":"buffer","text":"<p>A memory buffer with a specific memory_type that is used to describe the capabilities and behavior of the backing memory of the buffer. Buffers may be any mix of host-accessible, host-coherent, or device-accessible for various usages. Depending on these memory types the buffers may be mapped for access on the host as memory though certain restrictions may be imposed.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer_view","title":"buffer_view","text":"<p>A shaped and typed buffer reference. This just wraps an existing hal.buffer with its associated metadata to make it easier to pass across ABI boundaries. In most cases buffer views can be elided entirely by the compiler and they'll only be seen when calling external functions.</p>"},{"location":"reference/mlir-dialects/HAL/#collectivechannel","title":"collective.channel","text":"<p>Channel identifier used to allow for participation in multiple collective groups.</p>"},{"location":"reference/mlir-dialects/HAL/#command_buffer","title":"command_buffer","text":"<p>Asynchronous command buffer recording interface. Commands are recorded by the implementation for later submission to command queues.</p>"},{"location":"reference/mlir-dialects/HAL/#descriptor_set_layout","title":"descriptor_set_layout","text":"<p>Descriptor set layout.</p>"},{"location":"reference/mlir-dialects/HAL/#device","title":"device","text":"<p>Logical device instance.</p>"},{"location":"reference/mlir-dialects/HAL/#event","title":"event","text":"<p>Events are used for defining synchronization scopes within CommandBuffers. An event only exists within a single CommandBuffer and must not be used across CommandBuffers from the same device or others.</p>"},{"location":"reference/mlir-dialects/HAL/#executable","title":"executable","text":"<p>A prepared and ready-to-dispatch executable.</p>"},{"location":"reference/mlir-dialects/HAL/#fence","title":"fence","text":"<p>A set of semaphore timepoints defining a common point in time across multiple timelines.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer_1","title":"buffer","text":"<p>A stateless file handle that can be read/written using queue-ordered transfer operations.</p>"},{"location":"reference/mlir-dialects/HAL/#pipeline_layout","title":"pipeline_layout","text":"<p>A pipeline layout describing the descriptor sets and push constants used.</p>"},{"location":"reference/mlir-dialects/HALInline/","title":"HAL/Inline","text":""},{"location":"reference/mlir-dialects/HALInline/#hal_inline-dialect","title":"'hal_inline' Dialect","text":"<p>IREE inline HAL interop runtime module dialect.</p> <p>Low-level dialect for limited in-process ABI interop with the full HAL. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.</p> <p>This dialect can be used alongside the full HAL but is intended for use in standalone configurations or paired with the <code>hal_loader</code> dialect which also carries the same usage restrictions.</p> <p>See <code>hal_inline.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'hal_inline' Dialect<ul> <li>Operations<ul> <li>Buffer ops<ul> <li>hal_inline.buffer.allocate.initialized (HAL::Inline::BufferAllocateInitializedOp)</li> <li>hal_inline.buffer.allocate (HAL::Inline::BufferAllocateOp)</li> <li>hal_inline.buffer.length (HAL::Inline::BufferLengthOp)</li> <li>hal_inline.buffer.storage (HAL::Inline::BufferStorageOp)</li> <li>hal_inline.buffer.subspan (HAL::Inline::BufferSubspanOp)</li> <li>hal_inline.buffer.wrap (HAL::Inline::BufferWrapOp)</li> </ul> </li> <li>Buffer view ops<ul> <li>hal_inline.buffer_view.assert (HAL::Inline::BufferViewAssertOp)</li> <li>hal_inline.buffer_view.buffer (HAL::Inline::BufferViewBufferOp)</li> <li>hal_inline.buffer_view.create (HAL::Inline::BufferViewCreateOp)</li> <li>hal_inline.buffer_view.dim (HAL::Inline::BufferViewDimOp)</li> <li>hal_inline.buffer_view.element_type (HAL::Inline::BufferViewElementTypeOp)</li> <li>hal_inline.buffer_view.encoding_type (HAL::Inline::BufferViewEncodingTypeOp)</li> <li>hal_inline.buffer_view.rank (HAL::Inline::BufferViewRankOp)</li> <li>hal_inline.buffer_view.trace (HAL::Inline::BufferViewTraceOp)</li> </ul> </li> <li>Device ops<ul> <li>hal_inline.device.query (HAL::Inline::DeviceQueryOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HALInline/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HALInline/#buffer-ops","title":"Buffer ops","text":"<p>Ops for <code>!hal.buffer</code> / <code>iree_hal_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocateinitialized-halinlinebufferallocateinitializedop","title":"<code>hal_inline.buffer.allocate.initialized</code> (HAL::Inline::BufferAllocateInitializedOp)","text":"<p>Buffer allocation with cloning</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.allocate.initialized` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `alignment` `(` $minimum_alignment `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), ref($length)) `in` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer with a copy of the provided contents.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands","title":"Operands:","text":"Operand Description <code>minimum_alignment</code> index <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results","title":"Results:","text":"Result Description <code>result</code> buffer <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocate-halinlinebufferallocateop","title":"<code>hal_inline.buffer.allocate</code> (HAL::Inline::BufferAllocateOp)","text":"<p>Empty buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.allocate` `alignment` `(` $minimum_alignment `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $allocation_size) `in` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer of the given size. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_1","title":"Operands:","text":"Operand Description <code>minimum_alignment</code> index <code>allocation_size</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_1","title":"Results:","text":"Result Description <code>result</code> buffer <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferlength-halinlinebufferlengthop","title":"<code>hal_inline.buffer.length</code> (HAL::Inline::BufferLengthOp)","text":"<p>Buffer byte length accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.length` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_2","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferstorage-halinlinebufferstorageop","title":"<code>hal_inline.buffer.storage</code> (HAL::Inline::BufferStorageOp)","text":"<p>Buffer backing storage accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.storage` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the host backing storage of the HAL buffer as a subspan limited to to the buffer's logical range (meaning that byte 0 of the returned buffer is byte 0 of the HAL buffer).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_3","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#results_3","title":"Results:","text":"Result Description <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffersubspan-halinlinebuffersubspanop","title":"<code>hal_inline.buffer.subspan</code> (HAL::Inline::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_4","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_4","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferwrap-halinlinebufferwrapop","title":"<code>hal_inline.buffer.wrap</code> (HAL::Inline::BufferWrapOp)","text":"<p>Host buffer wrapping operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.wrap` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Tries wrapping a !hal.buffer around host memory backed by the given byte buffer.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_5","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_5","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#buffer-view-ops","title":"Buffer view ops","text":"<p>Ops for <code>!hal.buffer_view</code> / <code>iree_hal_buffer_view_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewassert-halinlinebufferviewassertop","title":"<code>hal_inline.buffer_view.assert</code> (HAL::Inline::BufferViewAssertOp)","text":"<p>Buffer view contents assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.assert` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `message` `(` $message `)`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if <code>std.assert</code> had been used.</p>"},{"location":"reference/mlir-dialects/HALInline/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_6","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewbuffer-halinlinebufferviewbufferop","title":"<code>hal_inline.buffer_view.buffer</code> (HAL::Inline::BufferViewBufferOp)","text":"<p>Buffer view buffer accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.buffer` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer backing this view's contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_7","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_6","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewcreate-halinlinebufferviewcreateop","title":"<code>hal_inline.buffer_view.create</code> (HAL::Inline::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_8","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HALInline/#results_7","title":"Results:","text":"Result Description <code>result</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewdim-halinlinebufferviewdimop","title":"<code>hal_inline.buffer_view.dim</code> (HAL::Inline::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.dim` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `` `[` $index `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_9","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_8","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewelement_type-halinlinebufferviewelementtypeop","title":"<code>hal_inline.buffer_view.element_type</code> (HAL::Inline::BufferViewElementTypeOp)","text":"<p>Buffer view element type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.element_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_10","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_9","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewencoding_type-halinlinebufferviewencodingtypeop","title":"<code>hal_inline.buffer_view.encoding_type</code> (HAL::Inline::BufferViewEncodingTypeOp)","text":"<p>Buffer view encoding type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.encoding_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the encoding type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_11","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_10","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewrank-halinlinebufferviewrankop","title":"<code>hal_inline.buffer_view.rank</code> (HAL::Inline::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.rank` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_12","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_11","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewtrace-halinlinebufferviewtraceop","title":"<code>hal_inline.buffer_view.trace</code> (HAL::Inline::BufferViewTraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.trace` $key `=`\n              $operands `:` type($operands)\n              attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.</p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_13","title":"Operands:","text":"Operand Description <code>operands</code> variadic of buffer_view"},{"location":"reference/mlir-dialects/HALInline/#device-ops","title":"Device ops","text":"<p>Ops for <code>!hal.device</code> / <code>iree_hal_device_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinedevicequery-halinlinedevicequeryop","title":"<code>hal_inline.device.query</code> (HAL::Inline::DeviceQueryOp)","text":"<p>Returns a runtime configuration parameter from the device</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.device.query` `key` `(` $category `:` `` `:` $key `)`\n              `:` type($ok) `,` type($value)\n              (`=` $default_value^)?\n              attr-dict-with-keyword\n</code></pre> <p>Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.</p> <p>This is roughly equivalent to the <code>sysconf</code> linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent.</p> <p>Users of the op must check the <code>ok</code> result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with <code>ex.</code> when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.</p> <p>Well-known keys: (none yet)</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>category</code>::mlir::StringAttrstring attribute <code>key</code>::mlir::StringAttrstring attribute <code>default_value</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HALInline/#results_12","title":"Results:","text":"Result Description <code>ok</code> 1-bit signless integer <code>value</code> any type"},{"location":"reference/mlir-dialects/HALLoader/","title":"HAL/Loader","text":""},{"location":"reference/mlir-dialects/HALLoader/#hal_loader-dialect","title":"'hal_loader' Dialect","text":"<p>IREE HAL inline executable loader runtime module dialect.</p> <p>Low-level dialect for dynamically loading executables and dispatching work. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.</p> <p>This dialect can be used alongside the full HAL but is intended for use in conjunction with the <code>hal_inline</code> dialect which also carries the same usage restrictions.</p> <p>See <code>hal_loader.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'hal_loader' Dialect<ul> <li>Operations<ul> <li>Executable ops<ul> <li>hal_loader.executable.dispatch (HAL::Loader::ExecutableDispatchOp)</li> <li>hal_loader.executable.dispatch.symbol (HAL::Loader::ExecutableDispatchSymbolOp)</li> <li>hal_loader.executable.load (HAL::Loader::ExecutableLoadOp)</li> <li>hal_loader.executable.lookup (HAL::Loader::ExecutableLookupOp)</li> <li>hal_loader.executable.query_support (HAL::Loader::ExecutableQuerySupportOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HALLoader/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HALLoader/#executable-ops","title":"Executable ops","text":"<p>Ops for <code>!hal.executable</code> / <code>iree_hal_executable_t</code>.</p>"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatch-halloaderexecutabledispatchop","title":"<code>hal_loader.executable.dispatch</code> (HAL::Loader::ExecutableDispatchOp)","text":"<p>Inline executable dispatch operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.dispatch` `executable` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              (`constants` `(` `[` $push_constants^ `]` `)`)?\n              `bindings` `(` `[`\n              custom&lt;DispatchBindings&gt;($binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches execution to an executable entry point with the given parameters.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HALLoader/#operands","title":"Operands:","text":"Operand Description <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index <code>push_constants</code> variadic of 32-bit signless integer <code>binding_buffers</code> variadic of a reference counted byte buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatchsymbol-halloaderexecutabledispatchsymbolop","title":"<code>hal_loader.executable.dispatch.symbol</code> (HAL::Loader::ExecutableDispatchSymbolOp)","text":"<p>Inline executable dispatch operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.dispatch.symbol` `executable` `(` $executable `:` type($executable) `)`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              (`constants` `(` `[` $push_constants^ `]` `)`)?\n              `bindings` `(` `[`\n              custom&lt;DispatchBindings&gt;($binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches execution to an executable entry point with the given parameters. The entry point is a symbolic reference to an exported entry point.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_1","title":"Operands:","text":"Operand Description <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index <code>push_constants</code> variadic of 32-bit signless integer <code>binding_buffers</code> variadic of a reference counted byte buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutableload-halloaderexecutableloadop","title":"<code>hal_loader.executable.load</code> (HAL::Loader::ExecutableLoadOp)","text":"<p>Dynamically loads an executable</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.load` `format` `(` $format `)`\n              `data` `(` $data `)`\n              (`constants` `(` `[` $constants^ `]` `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates, loads, and dynamically links an executable.</p> <p>Optional constants provide for specialization of the executable based on runtime-derived parameters.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_2","title":"Operands:","text":"Operand Description <code>data</code> a reference counted byte buffer <code>constants</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HALLoader/#results","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablelookup-halloaderexecutablelookupop","title":"<code>hal_loader.executable.lookup</code> (HAL::Loader::ExecutableLookupOp)","text":"<p>Executable cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.lookup` `executable` `(` $executable `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_1","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablequery_support-halloaderexecutablequerysupportop","title":"<code>hal_loader.executable.query_support</code> (HAL::Loader::ExecutableQuerySupportOp)","text":"<p>Queries whether an executable format is supported</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.query_support` `format` `(` $executable_format `)`\n              `:` type($supported)\n              attr-dict-with-keyword\n</code></pre> <p>Returns true if the given format is supported by the device loader. This does not guarantee that loading will succeed as the executable may require functionality that cannot be met my the hosting runtime environment.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable_format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_2","title":"Results:","text":"Result Description <code>supported</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/IOParameters/","title":"IO/Parameters","text":""},{"location":"reference/mlir-dialects/IOParameters/#io_parameters-dialect","title":"'io_parameters' Dialect","text":"<p>External parameter resource management APIs.</p> <p>Parameters are externalized storage for resources that are asynchronously accessible and device-aware. Parameters can be read or written on the same device timelines as the operations that consume or produce them and with locality pinning to ensure memory doesn't need to move. Parameters are referenced by a scope and a key, with the scope being optional but strongly recommended as a way to distinguish sets of parameters that may exist when multiple model parts are compiled together and would otherwise collide.</p> <p>Parameters are provided by a few operations implementing a virtual interface and can support shared parameters (same storage used in multiple contexts, or outliving a single instantiation in a context), in-memory caches, memory-mapped files (including directly using the mapped memory for execution when devices support it), <code>iree_hal_file_t</code> usage for device-supported I/O, and parameter subsetting for things like runtime sharding.</p> <p>Alongside read(+load) and write operations gather and scatter allow for batching of large numbers of reads and writes into/from single buffers. For parameter providers that can batch operations this allows for a handful (~1-4) of calls out to perform many more operations (~thousands). Modeling the gather/scatter also gives us a point where we could extract the mapping and use it to repack files/defrag memory in the future.</p> <p>See <code>io_parameters.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'io_parameters' Dialect<ul> <li>Operations<ul> <li>Parameter I/O ops<ul> <li>io_parameters.gather (IO::Parameters::GatherOp)</li> <li>io_parameters.load (IO::Parameters::LoadOp)</li> <li>io_parameters.scatter (IO::Parameters::ScatterOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IOParameters/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IOParameters/#parameter-io-ops","title":"Parameter I/O ops","text":"<p>Ops parameter I/O.</p>"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersgather-ioparametersgatherop","title":"<code>io_parameters.gather</code> (IO::Parameters::GatherOp)","text":"<p>Gathers multiple parameters from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.gather` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `{`\n              custom&lt;ParameterGatherOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              $target_buffer, type($target_buffer), $target_offsets, $target_lengths)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously gathers one or more parameters into a single target buffer. This is equivalent to one read per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_offsets</code> variadic of 64-bit signless integer <code>target_buffer</code> buffer <code>target_offsets</code> variadic of index <code>target_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersload-ioparametersloadop","title":"<code>io_parameters.load</code> (IO::Parameters::LoadOp)","text":"<p>Reads one or more parameters from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.load` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `{`\n              custom&lt;ParameterLoadOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              type($results), $lengths)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads one or more parameters from an external parameter provider and returns the resulting buffers. Depending on the parameter and buffer types this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if an allocate + read had been used.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/IOParameters/#operands_1","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_offsets</code> variadic of 64-bit signless integer <code>lengths</code> variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#results","title":"Results:","text":"Result Description <code>results</code> variadic of buffer"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersscatter-ioparametersscatterop","title":"<code>io_parameters.scatter</code> (IO::Parameters::ScatterOp)","text":"<p>Scatters multiple parameters to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.scatter` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `{`\n              custom&lt;ParameterScatterOperations&gt;(\n              $source_buffer, type($source_buffer), $source_offsets, $source_lengths,\n              $target_scope, $target_keys, $target_offsets)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously scatters one or more parameters from a single source buffer into one or more parameters. This is equivalent to one write per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_keys</code>::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands_2","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_buffer</code> buffer <code>source_offsets</code> variadic of index <code>source_lengths</code> variadic of index <code>target_offsets</code> variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/IREEInput/","title":"IREEInput","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_input-dialect","title":"'iree_input' Dialect","text":"<p>Public ops/type/attributes legal for input to IREE's compiler.</p> <p>IREE's compiler allows as input a number of common dialects. This dialect contains structural and unique ops that do not exist elsewhere or that IREE has an interest in maintaining as a stable set.</p> <p>The contents of this dialect often mirror various constructs in IREE's internal implementation. The focus here is on simplicity and stability over time. Generally, this dialect does not use \"advanced\" features and should be broadly source compatible over a range of LLVM versions. There are of course, limits, and source-compatibility is not guaranteed, since LLVM/MLIR's API surface is itself unstable.</p> <ul> <li>'iree_input' Dialect<ul> <li>Operations<ul> <li>Buffer and buffer view ops<ul> <li>iree_input.buffer.subspan (Input::BufferSubspanOp)</li> <li>iree_input.buffer_view.create (Input::BufferViewCreateOp)</li> <li>iree_input.buffer_view.dim (Input::BufferViewDimOp)</li> <li>iree_input.buffer_view.rank (Input::BufferViewRankOp)</li> </ul> </li> <li>Byte buffer ops<ul> <li>iree_input.byte_buffer.constant (Input::ByteBufferConstantOp)</li> </ul> </li> <li>Compiler hint ops<ul> <li>iree_input.optimization_barrier (Input::OptimizationBarrierOp)</li> </ul> </li> <li>Dispatch ops<ul> <li>iree_input.dispatch (Input::DispatchOp)</li> </ul> </li> <li>Executable source ops<ul> <li>iree_input.executable.export (Input::ExecutableExportOp)</li> <li>iree_input.executable.source_end (Input::ExecutableSourceEndOp)</li> <li>iree_input.executable.source (Input::ExecutableSourceOp)</li> </ul> </li> <li>Global variable ops<ul> <li>iree_input.global.address (Input::GlobalAddressOp)</li> <li>iree_input.global.load.indirect (Input::GlobalLoadIndirectOp)</li> <li>iree_input.global.load (Input::GlobalLoadOp)</li> <li>iree_input.global (Input::GlobalOp)</li> <li>iree_input.global.store.indirect (Input::GlobalStoreIndirectOp)</li> <li>iree_input.global.store (Input::GlobalStoreOp)</li> </ul> </li> <li>Mutable list ops<ul> <li>iree_input.list.create (Input::ListCreateOp)</li> <li>iree_input.list.get (Input::ListGetOp)</li> <li>iree_input.list.resize (Input::ListResizeOp)</li> <li>iree_input.list.set (Input::ListSetOp)</li> <li>iree_input.list.size (Input::ListSizeOp)</li> </ul> </li> <li>Pseudo ops for conversion support<ul> <li>iree_input.tensor.export (Input::TensorExportOp)</li> <li>iree_input.tensor.import (Input::TensorImportOp)</li> </ul> </li> <li>Tensor ops<ul> <li>iree_input.tensor.bitcast (Input::TensorBitCastOp)</li> <li>iree_input.tensor.clone (Input::TensorCloneOp)</li> <li>iree_input.tensor.load (Input::TensorLoadOp)</li> <li>iree_input.tensor.reshape (Input::TensorReshapeOp)</li> <li>iree_input.tensor.slice (Input::TensorSliceOp)</li> <li>iree_input.tensor.splat (Input::TensorSplatOp)</li> <li>iree_input.tensor.store (Input::TensorStoreOp)</li> <li>iree_input.tensor.trace (Input::TensorTraceOp)</li> <li>iree_input.tensor.update (Input::TensorUpdateOp)</li> </ul> </li> <li>Utility ops<ul> <li>iree_input.align (Input::AlignOp)</li> <li>iree_input.null (Input::NullOp)</li> </ul> </li> <li>Workgroup dispatch ops<ul> <li>iree_input.dispatch.workgroup.count (Input::DispatchWorkgroupCountOp)</li> <li>iree_input.dispatch.workgroup.id (Input::DispatchWorkgroupIDOp)</li> <li>iree_input.dispatch.workgroup.size (Input::DispatchWorkgroupSizeOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>DescriptorSetBindingAttr</li> <li>DescriptorSetLayoutAttr</li> <li>DescriptorTypeAttr</li> <li>DeviceTargetAttr</li> <li>ExecutableObjectAttr</li> <li>ExecutableObjectsAttr</li> <li>ExecutableTargetAttr</li> <li>PipelineLayoutAttr</li> </ul> </li> <li>Type constraints<ul> <li>list</li> </ul> </li> <li>Types<ul> <li>BufferType</li> <li>BufferViewType</li> <li>ByteBufferType</li> <li>ListType</li> <li>PtrType</li> <li>VariantType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IREEInput/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffer-and-buffer-view-ops","title":"Buffer and buffer view ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffersubspan-inputbuffersubspanop","title":"<code>iree_input.buffer.subspan</code> (Input::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands","title":"Operands:","text":"Operand Description <code>source_buffer</code> Buffer is an untyped bag of bits with no shape or dtype <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results","title":"Results:","text":"Result Description <code>result</code> Buffer is an untyped bag of bits with no shape or dtype"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewcreate-inputbufferviewcreateop","title":"<code>iree_input.buffer_view.create</code> (Input::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_1","title":"Operands:","text":"Operand Description <code>source_buffer</code> Buffer is an untyped bag of bits with no shape or dtype <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_1","title":"Results:","text":"Result Description <code>result</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewdim-inputbufferviewdimop","title":"<code>iree_input.buffer_view.dim</code> (Input::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.dim` $buffer_view `,` $index attr-dict `:` type($result)\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_2","title":"Operands:","text":"Operand Description <code>buffer_view</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewrank-inputbufferviewrankop","title":"<code>iree_input.buffer_view.rank</code> (Input::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.rank` $buffer_view attr-dict `:` type($result)\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_3","title":"Operands:","text":"Operand Description <code>buffer_view</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_3","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#byte-buffer-ops","title":"Byte buffer ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbyte_bufferconstant-inputbytebufferconstantop","title":"<code>iree_input.byte_buffer.constant</code> (Input::ByteBufferConstantOp)","text":"<p>Constant host-side byte buffer</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.byte_buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::StringAttrstring attribute <code>alignment</code>::mlir::IntegerAttrindex attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_4","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/IREEInput/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputoptimization_barrier-inputoptimizationbarrierop","title":"<code>iree_input.optimization_barrier</code> (Input::OptimizationBarrierOp)","text":"<p>Prevents compiler optimizations across a value.</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.optimization_barrier` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.</p> <p>Traits: <code>SameOperandsAndResultType</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_4","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#results_5","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatch-inputdispatchop","title":"<code>iree_input.dispatch</code> (Input::DispatchOp)","text":"<p>A dispatch of an executable across a grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch` $entry_point\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_5","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_6","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#executable-source-ops","title":"Executable source ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutableexport-inputexecutableexportop","title":"<code>iree_input.executable.export</code> (Input::ExecutableExportOp)","text":"<p>Executable entry point declaration</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              `ordinal` `(` $ordinal `)`\n              `layout` `(` $layout `)`\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Input::ExecutableSourceOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrsize_t <code>layout</code>::mlir::iree_compiler::IREE::Input::PipelineLayoutAttrexecutable entry point layout specification <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource_end-inputexecutablesourceendop","title":"<code>iree_input.executable.source_end</code> (Input::ExecutableSourceEndOp)","text":"<p>Terminator pseudo-op for the executable source op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.source_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Input::ExecutableSourceOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource-inputexecutablesourceop","title":"<code>iree_input.executable.source</code> (Input::ExecutableSourceOp)","text":"<p>Generic source contents of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.source` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              ``\n              regions\n</code></pre> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Input::ExecutableSourceEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>objects</code>::mlir::iree_compiler::IREE::Input::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/IREEInput/#global-variable-ops","title":"Global variable ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobaladdress-inputglobaladdressop","title":"<code>iree_input.global.address</code> (Input::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.address` $global attr-dict `:` type($result)\n</code></pre> <p>Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_7","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputgloballoadindirect-inputgloballoadindirectop","title":"<code>iree_input.global.load.indirect</code> (Input::GlobalLoadIndirectOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.load.indirect` $global attr-dict `:` type($global) `-&gt;` type($result)\n</code></pre> <p>Returns a copy of the global value.</p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_6","title":"Operands:","text":"Operand Description <code>global</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#results_8","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalload-inputgloballoadop","title":"<code>iree_input.global.load</code> (Input::GlobalLoadOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.load` $global attr-dict `:` type($result)\n</code></pre> <p>Returns a copy of the global value.</p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_9","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobal-inputglobalop","title":"<code>iree_input.global</code> (Input::GlobalOp)","text":"<p>Stateful global variable declaration</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              (`initializer` `(` $initializer^ `)`)?\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different global storage.</p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initializer</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>initial_value</code>::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstoreindirect-inputglobalstoreindirectop","title":"<code>iree_input.global.store.indirect</code> (Input::GlobalStoreIndirectOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.store.indirect` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a copy of the value into a global.</p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_7","title":"Operands:","text":"Operand Description <code>value</code> any type <code>global</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstore-inputglobalstoreop","title":"<code>iree_input.global.store</code> (Input::GlobalStoreOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.store` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a copy of the value into a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_8","title":"Operands:","text":"Operand Description <code>value</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#mutable-list-ops","title":"Mutable list ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistcreate-inputlistcreateop","title":"<code>iree_input.list.create</code> (Input::ListCreateOp)","text":"<p>Creates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.create` ($initial_capacity^)? attr-dict `:` type($result)\n</code></pre> <p>Creates a new empty list with an optional initial capacity.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_9","title":"Operands:","text":"Operand Description <code>initial_capacity</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results_10","title":"Results:","text":"Result Description <code>result</code> list"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistget-inputlistgetop","title":"<code>iree_input.list.get</code> (Input::ListGetOp)","text":"<p>Element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.get` $list `[` $index `]` attr-dict `:` type($list) `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_10","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results_11","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistresize-inputlistresizeop","title":"<code>iree_input.list.resize</code> (Input::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.resize` operands attr-dict `:` type($list)\n</code></pre> <p>Resizes the list to contain <code>new_size</code> elements. This will either truncate the list if the existing size is greater than <code>new_size</code> or extend the list with the default list value of the element type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_11","title":"Operands:","text":"Operand Description <code>list</code> list <code>new_size</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistset-inputlistsetop","title":"<code>iree_input.list.set</code> (Input::ListSetOp)","text":"<p>Element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.set` $list `[` $index `]` `,` $value attr-dict `:` type($list) `,` type($value)\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_12","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> index <code>value</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistsize-inputlistsizeop","title":"<code>iree_input.list.size</code> (Input::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.size` operands attr-dict `:` type($list)\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_13","title":"Operands:","text":"Operand Description <code>list</code> list"},{"location":"reference/mlir-dialects/IREEInput/#results_12","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#pseudo-ops-for-conversion-support","title":"Pseudo ops for conversion support","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorexport-inputtensorexportop","title":"<code>iree_input.tensor.export</code> (Input::TensorExportOp)","text":"<p>Exports a tensor to a Buffer(View), capturing dynamic dims</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.export` $source `:` type($source) (`{` $source_dims^ `}`)? `-&gt;` type($target)\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_14","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_13","title":"Results:","text":"Result Description <code>target</code> Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorimport-inputtensorimportop","title":"<code>iree_input.tensor.import</code> (Input::TensorImportOp)","text":"<p>Imports a Buffer(View) to a tensor, providing dynamic dims</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.import` $source `:` type($source) `-&gt;` type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_15","title":"Operands:","text":"Operand Description <code>source</code> Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type <code>target_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_14","title":"Results:","text":"Result Description <code>target</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorbitcast-inputtensorbitcastop","title":"<code>iree_input.tensor.bitcast</code> (Input::TensorBitCastOp)","text":"<p>Bitcasts a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.bitcast` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Bitcasts a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_16","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_15","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorclone-inputtensorcloneop","title":"<code>iree_input.tensor.clone</code> (Input::TensorCloneOp)","text":"<p>Performs a full tensor clone operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.clone` $operand `:` type($result) (`{` $operand_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones the input tensor into an identical output tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_17","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>operand_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_16","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorload-inputtensorloadop","title":"<code>iree_input.tensor.load</code> (Input::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.load` $source (`[` $indices^ `]`)? `:`\n              type($source) (`{` $source_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_18","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_17","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorreshape-inputtensorreshapeop","title":"<code>iree_input.tensor.reshape</code> (Input::TensorReshapeOp)","text":"<p>Reshapes a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.reshape` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Reshapes a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_19","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_18","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorslice-inputtensorsliceop","title":"<code>iree_input.tensor.slice</code> (Input::TensorSliceOp)","text":"<p>Slices out a subregion of a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones a subregion of a tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_20","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_19","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorsplat-inputtensorsplatop","title":"<code>iree_input.tensor.splat</code> (Input::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_21","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_20","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorstore-inputtensorstoreop","title":"<code>iree_input.tensor.store</code> (Input::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n              type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_22","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_21","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensortrace-inputtensortraceop","title":"<code>iree_input.tensor.trace</code> (Input::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.trace` $key `=` `[`\n              custom&lt;ShapedOperandList&gt;($values, type($values), $value_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_23","title":"Operands:","text":"Operand Description <code>values</code> variadic of ranked tensor of any type values <code>value_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorupdate-inputtensorupdateop","title":"<code>iree_input.tensor.update</code> (Input::TensorUpdateOp)","text":"<p>Updates a tensor with the contents of another tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n              type($update) (`{` $update_dims^ `}`)? `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims)\n              attr-dict-with-keyword\n</code></pre> <p>Updates the target tensor with the contents of the update tensor at the given offset indices.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_24","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>start_indices</code> variadic of index <code>update</code> ranked tensor of any type values <code>update_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_22","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputalign-inputalignop","title":"<code>iree_input.align</code> (Input::AlignOp)","text":"<p>Aligns up to a power-of-two alignment if required</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.align` $value `,` $alignment attr-dict `:` type($result)\n</code></pre> <p>Aligns |value| up to the given power-of-two |alignment| if required.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_25","title":"Operands:","text":"Operand Description <code>value</code> signless-integer-like <code>alignment</code> signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#results_23","title":"Results:","text":"Result Description <code>result</code> signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputnull-inputnullop","title":"<code>iree_input.null</code> (Input::NullOp)","text":"<p>A null value</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.null` attr-dict `:` type($result)\n</code></pre> <p>Initializes reference and variant types with a null value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#results_24","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#workgroup-dispatch-ops","title":"Workgroup dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupcount-inputdispatchworkgroupcountop","title":"<code>iree_input.dispatch.workgroup.count</code> (Input::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.count[0] : index\n%y = iree_input.dispatch.workgroup.count[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_25","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupid-inputdispatchworkgroupidop","title":"<code>iree_input.dispatch.workgroup.id</code> (Input::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, iree_input.dispatch.workgroup.count)</code> along each dimension.</p> <p>Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.id[0] : index\n%y = iree_input.dispatch.workgroup.id[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_26","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupsize-inputdispatchworkgroupsizeop","title":"<code>iree_input.dispatch.workgroup.size</code> (Input::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the iree dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.size[0] : index\n%y = iree_input.dispatch.workgroup.size[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_27","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#attributes_13","title":"Attributes","text":""},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"<p>descriptor set binding specification</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_set.binding&lt;\n  int64_t,   # ordinal\n  DescriptorType,   # type\n  std::optional&lt;DescriptorFlags&gt;   # flags\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> type <code>DescriptorType</code> flags <code>std::optional&lt;DescriptorFlags&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"<p>descriptor set layout specification</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_set.layout&lt;\n  int64_t,   # ordinal\n  ::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;,   # bindings\n  std::optional&lt;DescriptorSetLayoutFlags&gt;   # flags\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> bindings <code>::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;</code> flags <code>std::optional&lt;DescriptorSetLayoutFlags&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#descriptortypeattr","title":"DescriptorTypeAttr","text":"<p>valid DescriptorType</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_type&lt;\n  ::mlir::iree_compiler::IREE::Input::DescriptorType   # value\n&gt;\n</code></pre> <p>Enum cases: * uniform_buffer (<code>UniformBuffer</code>) * storage_buffer (<code>StorageBuffer</code>)</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::Input::DescriptorType</code> an enum of type DescriptorType"},{"location":"reference/mlir-dialects/IREEInput/#devicetargetattr","title":"DeviceTargetAttr","text":"<p>generic device target specification</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description deviceID <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectattr","title":"ExecutableObjectAttr","text":"<p>executable object reference</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description path <code>StringAttr</code> data <code>DenseIntElementsAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"<p>target-specific object file references</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description targets <code>ArrayAttr</code> targetObjects <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executabletargetattr","title":"ExecutableTargetAttr","text":"<p>generic executable target specification</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description backend <code>StringAttr</code> format <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"<p>executable entry point layout specification</p> <p>Syntax:</p> <pre><code>#iree_input.pipeline.layout&lt;\n  int64_t,   # pushConstants\n  ::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;   # setLayouts\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pushConstants <code>int64_t</code> setLayouts <code>::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/IREEInput/#list","title":"list","text":"<p>A mutable, resizable list of some type.</p>"},{"location":"reference/mlir-dialects/IREEInput/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffertype","title":"BufferType","text":"<p>Buffer is an untyped bag of bits with no shape or dtype</p> <p>Syntax: <code>!iree_input.buffer</code></p> <p>Buffers represent an untyped bag of bits that can be reinterpreted depending on a use case using <code>buffer_view</code> operation. Buffers can be used for packing multiple tensors into the same underlying storage. It is left to higher level code to decide how exactly tensors layed out in the buffer.</p>"},{"location":"reference/mlir-dialects/IREEInput/#bufferviewtype","title":"BufferViewType","text":"<p>View into a buffer, with runtime shape and element type</p> <p>Syntax: <code>!iree_input.buffer_view</code></p> <p>BufferViews represent views onto backing IREE runtime Buffer objects, adding runtime shape and element type parameters to the backing buffer. BufferViews are typically accepted and returned at boundaries with external code.</p> <p>In the runtime and lower level compiler, BufferView's are fully modeled; however, as boundary types, not all features are exposed publicly. Since within compiled tensor programs, it is typical to operate in terms of fully typed tensors, the primary mechanism for getting or using a BufferView at the high level is by casting to/from a tensor. It is left to higher level code to ensure that aliasing rules are enforced at such boundaries.</p>"},{"location":"reference/mlir-dialects/IREEInput/#bytebuffertype","title":"ByteBufferType","text":"<p>a reference counted byte buffer</p> <p>Syntax: <code>!iree_input.byte_buffer</code></p> <p>A reference counted byte buffer that models a pointer, offset, and length.</p>"},{"location":"reference/mlir-dialects/IREEInput/#listtype","title":"ListType","text":"<p>A one dimensional list of runtime values</p> <p>Represents a list of arbitrary type. Primitive types can be expected to be efficiently stored in an unboxed form. Reference types and variants are permitted.</p> <p>Lists can either be homogenous, with a fixed element type, or heterogenous by parameterizing them with a VariantType.</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description elementType <code>::mlir::Type</code> A type suitable as an element type of a container"},{"location":"reference/mlir-dialects/IREEInput/#ptrtype","title":"PtrType","text":"<p>Pointer to a concrete type</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description targetType <code>::mlir::Type</code> A type suitable as a target type of a pointer"},{"location":"reference/mlir-dialects/IREEInput/#varianttype","title":"VariantType","text":"<p>Represents any legal or reference type in the IREE runtime</p> <p>Syntax: <code>!iree_input.variant</code></p> <p>The variant type is typically used to parameterize container types that can contain any legal primitive, reference or null in the IREE type system.</p>"},{"location":"reference/mlir-dialects/IREEVectorExt/","title":"IREEVectorExt","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_ext-dialect","title":"'iree_vector_ext' Dialect","text":"<p>IREE Vector Extensions.</p> <p>A dialect designed for experimenting with vector operations beyond what is currently available in the Vector Dialect.</p> <ul> <li>'iree_vector_ext' Dialect<ul> <li>Operations<ul> <li>iree_vector_ext.layout_conflict_resolution (VectorExt::LayoutConflictResolutionOp)</li> <li>iree_vector_ext.to_simd (VectorExt::ToSIMDOp)</li> <li>iree_vector_ext.to_simt (VectorExt::ToSIMTOp)</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extlayout_conflict_resolution-vectorextlayoutconflictresolutionop","title":"<code>iree_vector_ext.layout_conflict_resolution</code> (VectorExt::LayoutConflictResolutionOp)","text":"<p>Layout Conflict Resolution operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.layout_conflict_resolution` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>The layout conflict resolution operator takes a vector and a desired layout and transforms the vector to one with the desired layout.</p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sourceLayout</code>::mlir::iree_compiler::IREE::VectorExt::VectorLayoutInterfaceVectorLayoutInterface instance <code>desiredLayout</code>::mlir::iree_compiler::IREE::VectorExt::VectorLayoutInterfaceVectorLayoutInterface instance"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extto_simd-vectorexttosimdop","title":"<code>iree_vector_ext.to_simd</code> (VectorExt::ToSIMDOp)","text":"<p>SIMT to SIMD conversion operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.to_simd` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>This operation is a temporary operation useful for source/target materializations when doing type conversions between distributed and not distributed vectors.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultElementType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands_1","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results_1","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extto_simt-vectorexttosimtop","title":"<code>iree_vector_ext.to_simt</code> (VectorExt::ToSIMTOp)","text":"<p>SIMD to SIMT conversion operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.to_simt` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>This operation is a temporary operation useful for source/target materializations when doing type conversions between distributed and not distributed vectors.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultElementType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands_2","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results_2","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/LinalgExt/","title":"LinalgExt","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_ext-dialect","title":"'iree_linalg_ext' Dialect","text":"<p>IREE Linalg Extensions.</p> <p>A dialect designed for experimenting with non-structured operations that cannot be represented efficiently/directly by the Linalg dialect.</p> <ul> <li>'iree_linalg_ext' Dialect<ul> <li>Operations<ul> <li>Data tiling ops<ul> <li>iree_linalg_ext.pack (LinalgExt::PackOp)</li> <li>iree_linalg_ext.set_encoding (LinalgExt::SetEncodingOp)</li> <li>iree_linalg_ext.unpack (LinalgExt::UnPackOp)</li> <li>iree_linalg_ext.unset_encoding (LinalgExt::UnsetEncodingOp)</li> <li>iree_linalg_ext.upper_bound_tile_size (LinalgExt::UpperBoundTileSizeOp)</li> </ul> </li> <li>Non-structured ops<ul> <li>iree_linalg_ext.attention (LinalgExt::AttentionOp)</li> <li>iree_linalg_ext.fft (LinalgExt::FftOp)</li> <li>iree_linalg_ext.reverse (LinalgExt::ReverseOp)</li> <li>iree_linalg_ext.scan (LinalgExt::ScanOp)</li> <li>iree_linalg_ext.scatter (LinalgExt::ScatterOp)</li> <li>iree_linalg_ext.sort (LinalgExt::SortOp)</li> <li>iree_linalg_ext.topk (LinalgExt::TopkOp)</li> </ul> </li> <li>Utility ops<ul> <li>iree_linalg_ext.yield (LinalgExt::YieldOp)</li> </ul> </li> <li>Winograd ops<ul> <li>iree_linalg_ext.winograd.input_transform (LinalgExt::WinogradInputTransformOp)</li> <li>iree_linalg_ext.winograd.output_transform (LinalgExt::WinogradOutputTransformOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>EncodingAttr</li> <li>EncodingRoleAttr</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/LinalgExt/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/LinalgExt/#data-tiling-ops","title":"Data tiling ops","text":"<p>Operations for working with data layouts, padding, encodings, and other properties useful for tiling computations across iteration space dimensions.</p>"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extpack-linalgextpackop","title":"<code>iree_linalg_ext.pack</code> (LinalgExt::PackOp)","text":"<p>Pack operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.pack` attr-dict\n              $inputs\n              (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?\n              (`outer_dims_perm` `=` $outer_dims_perm^)?\n              `inner_dims_pos` `=` $inner_dims_pos\n              `inner_tiles` `=`\n              custom&lt;DynamicIndexList&gt;($inner_tiles, $static_inner_tiles)\n              `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>The pack operation converts an <code>input</code> into a tiled and packed layout. The dimensions to be tiled are obtained from <code>inner_dims_pos</code> and the size of the tile is obtained from <code>inner_tiles</code>. The dimensions listed in <code>inner_dims_pos</code> do not need to be contiguous in which case the tile will get transposed.  We handle only full tiles if <code>padding_value</code> is not set; it is UB if the tile does not perfectly divide the dimension. If <code>padding_value</code> is set, it will pad along high dimensions, i.e., it pads at the bottom and on the right if the input has rank 2, and the result type shape, will be dynamic in any dimension if and only if the input shape is. As optional input, the operation takes <code>outer_dims_perm</code> that allows to permute the tiled loops.</p> <p>Example KC_to_KCck:</p> <pre><code>iree_linalg_ext.pack %arg0 inner_dims_pos = [1, 0]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;16x8x32x8xf32&gt;)\n</code></pre> <p>Example NC_to_NCnc:</p> <p><pre><code>iree_linalg_ext.pack %arg0 inner_dims_pos = [0, 1]\n  inner_tiles = [8, 32] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;16x8x8x32xf32&gt;)\n</code></pre> Example KC_to_CKkc</p> <pre><code>iree_linalg_ext.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;32x4x32x8xf32&gt;)\n</code></pre> <p>In all cases, dimension at position 0 in the input memref (128) is tiled with a factor of 8, while dimension at position 1 (256) is tiled with a factor of 32. In the KC_to_KCck example, the point loops are interchanged, while in the KC_to_CKkc example the tiled loops.</p> <p>Example NC_to_NCnc with padding:</p> <pre><code>iree_linalg_ext.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]\n  inner_tiles = [8, 2] into %arg1 : (memref&lt;13x15xf32&gt; memref&lt;2x8x8x2xf32&gt;)\n</code></pre> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>outer_dims_perm</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>inner_dims_pos</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_inner_tiles</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values <code>inner_tiles</code> variadic of index <code>padding_value</code> any type"},{"location":"reference/mlir-dialects/LinalgExt/#results","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extset_encoding-linalgextsetencodingop","title":"<code>iree_linalg_ext.set_encoding</code> (LinalgExt::SetEncodingOp)","text":"<p>Perform pack and pad operation on source</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.set_encoding` attr-dict $source `:` type($source) `-&gt;` type($result)\n</code></pre> <p>Operation to assign an encoding to a tensor. The operation does not change the rank or extent of a tensor. Instead it adds an encoding attribute to the tensor type to represent a change in layout.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_1","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_1","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extunpack-linalgextunpackop","title":"<code>iree_linalg_ext.unpack</code> (LinalgExt::UnPackOp)","text":"<p>Unpack operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.unpack` attr-dict\n              $inputs\n              (`outer_dims_perm` `=` $outer_dims_perm^)?\n              `inner_dims_pos` `=` $inner_dims_pos\n              `inner_tiles` `=`\n              custom&lt;DynamicIndexList&gt;($inner_tiles, $static_inner_tiles)\n              `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>The unpack operation converts a tiled and packed input to an unpacked output. See <code>pack</code> for more details on <code>inner_tiles</code> and <code>dims_pos</code>; it is UB if the tile does not perfectly divide the dimension. Optionally, the operation also supports permuting the tiled loops.</p> <p>Example KCck_to_KC:</p> <pre><code>iree_linalg_ext.unpack %arg0 dims_pos = [1, 0]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;16x8x32x8xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Example NCnc_to_NC:</p> <pre><code>iree_linalg_ext.unpack %arg0 dims_pos = [0, 1]\n  inner_tiles = [8, 32] into %arg1 : (memref&lt;16x8x8x32xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Example CKkc_to_KC:</p> <pre><code>iree_linalg_ext.unpack %arg1 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n  inner_tiles = [32, 8] into %arg0 : (memref&lt;32x4x32x8xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>outer_dims_perm</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>inner_dims_pos</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_inner_tiles</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_2","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values <code>inner_tiles</code> variadic of index"},{"location":"reference/mlir-dialects/LinalgExt/#results_2","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extunset_encoding-linalgextunsetencodingop","title":"<code>iree_linalg_ext.unset_encoding</code> (LinalgExt::UnsetEncodingOp)","text":"<p>Perfom unpack and extract operation on source</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.unset_encoding` attr-dict $source `:` type($source) `-&gt;` type($result)\n</code></pre> <p>Operation to convert an tensor with encoding that represents its data layout into a tensor with default layout (i.e. no encoding). For now in IREE the default layout is row-major.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_3","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_3","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extupper_bound_tile_size-linalgextupperboundtilesizeop","title":"<code>iree_linalg_ext.upper_bound_tile_size</code> (LinalgExt::UpperBoundTileSizeOp)","text":"<p>Returns an upper bound on tile sizes</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.upper_bound_tile_size` attr-dict $tensorType `-&gt;` type($results)\n</code></pre> <p>This returns the largest tile sizes that might result from materialization of the given encoding. This can be used outside of target-specific code, so there may be multiple targets, and this will return the maximum tile size from iterating over all of them. The evaluation happens in the MaterializeUpperBoundTileSize pass.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tensorType</code>::mlir::TypeAttrtype attribute of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_4","title":"Results:","text":"Result Description <code>results</code> variadic of index"},{"location":"reference/mlir-dialects/LinalgExt/#non-structured-ops","title":"Non-structured ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extattention-linalgextattentionop","title":"<code>iree_linalg_ext.attention</code> (LinalgExt::AttentionOp)","text":"<p>Attention operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.attention` attr-dict\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>This operator takes in 3 tensors: query(Q), key(K) and value(V) and computes the attention. For self-attention, all inputs have the same shape BxNxd where B is the of the batch dimension, N is the sequence length and d is head dimension. Typically N &gt;&gt;&gt; d. Mathematically, the attention is defined as matmul(softmax(matmul(Q, transpose(K))), V) and has shape BxNxd. Usually, this operator also performs scaling, masking and dropout, but we leave that out of the current implementation. For cross-attention, the query and output have the same shape (BxNxd), while the key and value differ in sequence length (they have shape BxLxd, where L != N). This operator after tiling results in a tiled result as per flash attention and results in the current <code>max</code> and <code>sum</code> statistics while processing the current tile.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>transpose_v</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_4","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_5","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extfft-linalgextfftop","title":"<code>iree_linalg_ext.fft</code> (LinalgExt::FftOp)","text":"<p>Fft operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.fft` attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`:` type($results)^)?\n</code></pre> <p>Apply 1D FFT to innermost dim. This is an iterative FFT, not recurrsive. Thus, the bit reversal is assumed applied on the input. The op carries an input -- stage, which indicates the level of reduction loop in the algorithm. It represents the computation body. For more details, see \"Data reordering, bit reversal, and in-place algorithms\" section in https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm</p> <p>The size of innermost dim is expected to be a power of 2.</p> <p>It is optional to carry coefficient tensors/buffers as inputs. In this context, they will be the second and third inputs.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_5","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of any type <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_6","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extreverse-linalgextreverseop","title":"<code>iree_linalg_ext.reverse</code> (LinalgExt::ReverseOp)","text":"<p>Reverse operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.reverse` attr-dict `dimensions` `(` $dimensions `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              (`outs` `(` $outputs^ `:` type($outputs) `)`)?\n              (`:` type($results)^)?\n</code></pre> <p>A temporary solution for lowering reverse ops into IREE, allowing IREE to tile and distribute them. }</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimensions</code>::mlir::DenseIntElementsAttr64-bit signless integer elements attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_6","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_7","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extscan-linalgextscanop","title":"<code>iree_linalg_ext.scan</code> (LinalgExt::ScanOp)","text":"<p>Scan operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.scan` attr-dict\n              `dimension` `(` $dimension `)`\n              `inclusive` `(` $inclusive `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Computes the inclusive/exclusive scan along a given dimension.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute <code>inclusive</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_7","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_8","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extscatter-linalgextscatterop","title":"<code>iree_linalg_ext.scatter</code> (LinalgExt::ScatterOp)","text":"<p>Scatter operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.scatter` attr-dict `dimension_map` `=` $dimension_map\n              `unique_indices` `(` $unique_indices `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Based on XLA operation semantics, takes two <code>inputs</code> (<code>update</code> and <code>indices</code>) and <code>outputs</code> value (<code>original</code>). The operation updates the value at the slices specified by <code>indices</code> by combining the current value with the value in <code>updates</code> using the computation specified in <code>region</code>. The <code>region</code> specifies a binary operation of signature (T, T) -&gt; T, where <code>T</code> is the element-type of <code>updates</code> (and <code>original</code>). The first argument correspond the value to be updated (i.e. from <code>updates</code>), and the second the current value (i.e. value from <code>original</code>).</p> <p>The <code>indices</code> is a 2D tensor/memref type. The first dim is the number of updates, and the second dim is index depth. The index depth should always be static.</p> <p>The first dim of <code>updates</code> and <code>indices</code> is identical, since they represent the number of updates.</p> <p>The rank of the <code>original</code>/<code>result</code> is at least <code>index_depth + rank(%updates) - 1</code>. The first <code>index_depth</code> indices are derived from <code>indices</code> and the shape of update value has the last rank(%original) - index_depth values match %(originals) last dimensions, with the previous dims extending from the index offsets.</p> <p>The dimension_map attributes describes which index value maps to which dimension in the destionation. It cannot contain duplicate values, must have as many entries as index depth, and values must be within the rank of the destination.</p> <p>The unique_indices attribute carries the information whether all the indices are unique. If there are repeated indices, the first iteration loop will be marked as reduction.</p> <p>The shapes definition follows tensorflow operations execept that it force batch dims to be 1D. See more information in   https://www.tensorflow.org/api_docs/python/tf/tensor_scatter_nd_update</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension_map</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>unique_indices</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_8","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of ranked tensor or memref of any type values <code>outputs</code> variadic of ranked tensor or memref of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_9","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extsort-linalgextsortop","title":"<code>iree_linalg_ext.sort</code> (LinalgExt::SortOp)","text":"<p>Sort operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.sort` attr-dict\n              `dimension` `(` $dimension `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Based on XLA operation semantics, sorts the given <code>operands</code> at the given <code>dimension</code> with the given <code>comparator</code>.</p> <p>See https://www.tensorflow.org/xla/operation_semantics#sort.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_9","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of any type <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_10","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_exttopk-linalgexttopkop","title":"<code>iree_linalg_ext.topk</code> (LinalgExt::TopkOp)","text":"<p>Top-K operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.topk` attr-dict\n              `dimension` `(` $dimension `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>A Top-K operation for N-D tensors. Reduces the target dimension from the input size N down to K elements based on the supplied binary region.</p> <p>Accepts an N-D tensor input consisting of values and an optioanl N-D tensor for indices of those values (i32 type). If input indices aren't provided, the index mapping is inferred based on the k dim.  Both input values/indices tensors and output values/indicies tensors must have the same shape. Top-K is computed along the target dimension (from dimension()). Returns two output tensors of values and the indicies of Top-K results. The output dimensions must match the input save for the dimension that is reduced to K results.</p> <p>Region accepts lhs=[next N input] and rhs=[exiting K output] and yeilds an i1. If true, the two values are swapped:   - For Top-K compoarision: &gt;   - For Min-K comparision: &lt; Note: when the two values are equal, the first occurence is always selected.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_10","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extyield-linalgextyieldop","title":"<code>iree_linalg_ext.yield</code> (LinalgExt::YieldOp)","text":"<p>LinalgExt yield op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.yield` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p><code>iree_linalg_ext.yield</code> is a special terminator operation for blocks inside regions in <code>iree_linalg_ext</code> ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_11","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/LinalgExt/#winograd-ops","title":"Winograd ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extwinogradinput_transform-linalgextwinogradinputtransformop","title":"<code>iree_linalg_ext.winograd.input_transform</code> (LinalgExt::WinogradInputTransformOp)","text":"<p>Winograd Input Transform operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.winograd.input_transform` attr-dict\n              `output_tile_size` `(` $output_tile_size `)`\n              `kernel_size` `(` $kernel_size `)`\n              `image_dimensions` `(` $image_dimensions `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($result)^)?\n</code></pre> <p>This operator is the first step in converting a convolution to its Winograd equivalent. Given a tile of an input image (I), this operator computes matmul(tranpose(B), matmul(I, B)). The input tile is assumed to be square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. B is a constant 2-d square matrix of the same shape as the input tile I. The input to the operator is an image of shape (N, H, W, C) or (N, C, H, W) and the output is an operator of shape (m + r - 1, m + r - 1, N, H', W', C) where H' = ceil((H - m + 1)/r) and W' = ceil((W - m + 1)/r). The result of this operator is first collapsed and then fed to a batch matmul op.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>output_tile_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>kernel_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>image_dimensions</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_12","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_12","title":"Results:","text":"Result Description <code>result</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extwinogradoutput_transform-linalgextwinogradoutputtransformop","title":"<code>iree_linalg_ext.winograd.output_transform</code> (LinalgExt::WinogradOutputTransformOp)","text":"<p>Winograd Output Transform operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.winograd.output_transform` attr-dict\n              `output_tile_size` `(` $output_tile_size `)`\n              `kernel_size` `(` $kernel_size `)`\n              `image_dimensions` `(` $image_dimensions `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($result)^)?\n</code></pre> <p>This operator is the last transform in converting a convolution to its Winograd equivalent. After convolution in the Winograd domain (which turns into an elementwise product for a single channel and batch matrix multiplication for many channels), this operator converts the output back into the original domain. Given a tile of the output (O) in the Winograd domain, this operator computes matmul(transpose(A), matmul(O, A)). The output tile is square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. A is a constant 2-d matrix of shape (m + r - 1) x r. The input to the operator is a tensor of shape (m + r - 1, m + r - 1, N, H', W', C) and the output is a tensor of shape (N, H, W, C) or (N, C, H, W) where H = r H' and W = r W'. This operator is followed by a tensor.extract_slice which extracts only the non-padded part of the output.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>output_tile_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>kernel_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>image_dimensions</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_13","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_13","title":"Results:","text":"Result Description <code>result</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_11","title":"Attributes","text":""},{"location":"reference/mlir-dialects/LinalgExt/#encodingattr","title":"EncodingAttr","text":"<p>information to decide how to data-tile a tensor</p> <p>Syntax:</p> <pre><code>#iree_linalg_ext.encoding&lt;\n  EncodingRoleAttr,   # role\n  ArrayAttr,   # element_types\n  TypeAttr,   # original_type\n  IntegerAttr,   # matmul_narrow_M\n  IntegerAttr,   # matmul_narrow_N\n  ArrayAttr   # user_indexing_maps\n&gt;\n</code></pre> <p>This attribute describes the change in the layout for a given tensor to execute subsequent operations on the tiled layout. The encoding serves as a way to represent the change in the way the data is laid out in memory without changing the logical rank/extent of the tensor itself. When required, the encoding can be used to explicitly manifest the layout change through operations like pack/unpack.</p>"},{"location":"reference/mlir-dialects/LinalgExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description role <code>EncodingRoleAttr</code> role of this tensor as an operand element_types <code>ArrayAttr</code> element types of the user's operands original_type <code>TypeAttr</code> type of the original tensor type before padding matmul_narrow_M <code>IntegerAttr</code> optional M narrow dimension size (only for contraction op user_indexing_maps) matmul_narrow_N <code>IntegerAttr</code> optional N narrow dimension size (only for contraction op user_indexing_maps) user_indexing_maps <code>ArrayAttr</code> Indexing maps of the operation using this tensor"},{"location":"reference/mlir-dialects/LinalgExt/#encodingroleattr","title":"EncodingRoleAttr","text":"<p>Describes the role of the tensor as an operand or a result of an operation.</p> <p>Syntax:</p> <pre><code>#iree_linalg_ext.role&lt;\n  ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole   # value\n&gt;\n</code></pre> <p>Enum cases: * LHS (<code>LHS</code>) * RHS (<code>RHS</code>) * RESULT (<code>RESULT</code>)</p>"},{"location":"reference/mlir-dialects/LinalgExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::LinalgExt::EncodingRole</code> an enum of type EncodingRole"},{"location":"reference/mlir-dialects/Stream/","title":"Stream","text":""},{"location":"reference/mlir-dialects/Stream/#stream-dialect","title":"'stream' Dialect","text":"<p>A dialect designed to model execution partitioning and scheduling.</p> <p>The stream dialect is designed to take tensor programs and convert them to explicitly scheduled asynchronous programs. This includes placing ops on specific targets, partitioning the work between the targets, scheduling the work for concurrency, and encoding tensors into target-specific resources.</p> <pre><code>+--------+    +----------+    +-------+\n| flow.* | -&gt; | stream.* | -&gt; | hal.* |\n+--------+    +----------+    +-------+\n</code></pre> <p>This sits in-between the <code>flow</code> and <code>hal</code> dialects.</p> <ul> <li> <p><code>flow</code> models tensor programs by separating work into dispatchable   functions in order to isolate the main host program data flow and the   dense tensor compute operations.</p> </li> <li> <p><code>stream</code> models explicitly scheduled asynchronous programs by partitioning   the dispatchable work, specifying target affinities, encoding tensors into   target-specific forms, and scheduling the work to run concurrently.</p> </li> <li> <p><code>hal</code> models a low-level hardware abstraction layer used to manage   buffers and issue asynchronous work across a variety of device types. The   dialect is largely 1:1 with the IREE HAL C API.</p> </li> </ul> <p>Transforms in the dialect lower tensor values into opaque resources with the goal of ensuring no tensors survive in the IR. At entry <code>stream.tensor.*</code> ops are used to capture the source tensor encoding information (data type, shapes, etc) and then lowered into <code>stream.async.*</code> ops that model the asynchronous workloads on the opaque resources. The asynchronous operations are then partitioned, allocated, and scheduled for execution using the <code>stream.cmd.*</code> ops.</p> <p>It's intended that after transformation through the stream dialect the program is ready for execution on an abstract machine. At this level of representation buffers have still not been allocated and devices are not yet resolved, however the information captured in the <code>stream</code> IR allows such operations to be done trivially. To this end all ops carry the symbolic size of the resources on which they operate as well as the lifetime of the resources they are acting upon. This manifests in the usage of the <code>!stream.resource</code> type:</p> <pre><code>// Unresolved lifetime (resolved during the iree-stream-refine-usage pass):\n!stream.resource&lt;*&gt;\n// An externally managed value (passed in via the program API).\n!stream.resource&lt;external&gt;\n// A staging buffer for uploads/downloads.\n!stream.resource&lt;staging&gt;\n// A short-lived value that is used across streams.\n!stream.resource&lt;transient&gt;\n// A long-lived value that persists across streams in globals.\n!stream.resource&lt;variable&gt;\n// An immutable value that persists for the duration of the program.\n!stream.resource&lt;constant&gt;\n</code></pre> <p>Operations using resources carry the size of all operand result resources:</p> <pre><code>// %update (40 bytes) is being inserted into %target (296 bytes).\n// Can be dynamic values such as those originating from dynamic dimensions.\n%13 = stream.async.update %update, %target[%c256 to %c296] :\n    !stream.resource&lt;transient&gt;{%c40} -&gt;\n    %target as !stream.resource&lt;transient&gt;{%c296}\n</code></pre> <p>Once all <code>stream.async.*</code> work is moved into executable regions (such as <code>stream.async.execute</code>) <code>!stream.timepoint</code> values are used to sequence the execution. These timepoints represent some point in time where all execution up to that timepoint has completed and any results that were produced by the execution are available for use. Attempting to use the resources before their corresponding timepoint has been reached will lead to undefined behavior. The benefit of this is that after timepoints are established in the IR it's possible to induce aliasing of resources without breaking execution correctness.</p> <ul> <li>'stream' Dialect<ul> <li>Operations<ul> <li>Async control flow ops<ul> <li>stream.async.call (Stream::AsyncCallOp)</li> <li>stream.async.concurrent (Stream::AsyncConcurrentOp)</li> <li>stream.async.execute (Stream::AsyncExecuteOp)</li> <li>stream.async.func (Stream::AsyncFuncOp)</li> </ul> </li> <li>Channel ops<ul> <li>stream.channel.count (Stream::ChannelCountOp)</li> <li>stream.channel.create (Stream::ChannelCreateOp)</li> <li>stream.channel.rank (Stream::ChannelRankOp)</li> <li>stream.channel.split (Stream::ChannelSplitOp)</li> </ul> </li> <li>Executable ops<ul> <li>stream.binding.subspan (Stream::BindingSubspanOp)</li> <li>stream.dispatch.workgroup.count (Stream::DispatchWorkgroupCountOp)</li> <li>stream.dispatch.workgroup.id (Stream::DispatchWorkgroupIDOp)</li> <li>stream.dispatch.workgroup.size (Stream::DispatchWorkgroupSizeOp)</li> <li>stream.executable.end (Stream::ExecutableEndOp)</li> <li>stream.executable.export (Stream::ExecutableExportOp)</li> <li>stream.executable (Stream::ExecutableOp)</li> </ul> </li> <li>Execution context ops<ul> <li>stream.context.resolve (Stream::ContextResolveOp)</li> </ul> </li> <li>Explicit command ops<ul> <li>stream.cmd.call (Stream::CmdCallOp)</li> <li>stream.cmd.collective (Stream::CmdCollectiveOp)</li> <li>stream.cmd.concurrent (Stream::CmdConcurrentOp)</li> <li>stream.cmd.copy (Stream::CmdCopyOp)</li> <li>stream.cmd.discard (Stream::CmdDiscardOp)</li> <li>stream.cmd.dispatch (Stream::CmdDispatchOp)</li> <li>stream.cmd.execute (Stream::CmdExecuteOp)</li> <li>stream.cmd.fill (Stream::CmdFillOp)</li> <li>stream.cmd.flush (Stream::CmdFlushOp)</li> <li>stream.cmd.func (Stream::CmdFuncOp)</li> <li>stream.cmd.invalidate (Stream::CmdInvalidateOp)</li> <li>stream.cmd.serial (Stream::CmdSerialOp)</li> </ul> </li> <li>File ops<ul> <li>stream.file.constant (Stream::FileConstantOp)</li> <li>stream.file.read (Stream::FileReadOp)</li> <li>stream.file.write (Stream::FileWriteOp)</li> </ul> </li> <li>Miscellaneous ops<ul> <li>stream.return (Stream::ReturnOp)</li> <li>stream.yield (Stream::YieldOp)</li> </ul> </li> <li>Pseudo Ops<ul> <li>stream.tensor.export (Stream::TensorExportOp)</li> <li>stream.tensor.import (Stream::TensorImportOp)</li> </ul> </li> <li>Resource ops<ul> <li>stream.resource.alloc (Stream::ResourceAllocOp)</li> <li>stream.resource.alloca (Stream::ResourceAllocaOp)</li> <li>stream.resource.constants (Stream::ResourceConstantsOp)</li> <li>stream.resource.dealloca (Stream::ResourceDeallocaOp)</li> <li>stream.resource.load (Stream::ResourceLoadOp)</li> <li>stream.resource.pack (Stream::ResourcePackOp)</li> <li>stream.resource.size (Stream::ResourceSizeOp)</li> <li>stream.resource.store (Stream::ResourceStoreOp)</li> <li>stream.resource.subview (Stream::ResourceSubviewOp)</li> <li>stream.resource.try_map (Stream::ResourceTryMapOp)</li> </ul> </li> <li>Resource parameter I/O ops<ul> <li>stream.parameter.gather (Stream::ParameterGatherOp)</li> <li>stream.parameter.load (Stream::ParameterLoadOp)</li> <li>stream.parameter.read (Stream::ParameterReadOp)</li> <li>stream.parameter.scatter (Stream::ParameterScatterOp)</li> <li>stream.parameter.write (Stream::ParameterWriteOp)</li> </ul> </li> <li>Resource transfer ops<ul> <li>stream.async.alloca (Stream::AsyncAllocaOp)</li> <li>stream.async.clone (Stream::AsyncCloneOp)</li> <li>stream.async.collective (Stream::AsyncCollectiveOp)</li> <li>stream.async.constant (Stream::AsyncConstantOp)</li> <li>stream.async.copy (Stream::AsyncCopyOp)</li> <li>stream.async.dispatch (Stream::AsyncDispatchOp)</li> <li>stream.async.fill (Stream::AsyncFillOp)</li> <li>stream.async.load (Stream::AsyncLoadOp)</li> <li>stream.async.slice (Stream::AsyncSliceOp)</li> <li>stream.async.splat (Stream::AsyncSplatOp)</li> <li>stream.async.store (Stream::AsyncStoreOp)</li> <li>stream.async.transfer (Stream::AsyncTransferOp)</li> <li>stream.async.update (Stream::AsyncUpdateOp)</li> </ul> </li> <li>Synchronization ops<ul> <li>stream.timepoint.await (Stream::TimepointAwaitOp)</li> <li>stream.timepoint.barrier (Stream::TimepointBarrierOp)</li> <li>stream.timepoint.chain_external (Stream::TimepointChainExternalOp)</li> <li>stream.timepoint.export (Stream::TimepointExportOp)</li> <li>stream.timepoint.immediate (Stream::TimepointImmediateOp)</li> <li>stream.timepoint.import (Stream::TimepointImportOp)</li> <li>stream.timepoint.join (Stream::TimepointJoinOp)</li> </ul> </li> <li>Tensor ops<ul> <li>stream.tensor.clone (Stream::TensorCloneOp)</li> <li>stream.tensor.constant (Stream::TensorConstantOp)</li> <li>stream.tensor.empty (Stream::TensorEmptyOp)</li> <li>stream.tensor.fill (Stream::TensorFillOp)</li> <li>stream.tensor.load (Stream::TensorLoadOp)</li> <li>stream.tensor.sizeof (Stream::TensorSizeOfOp)</li> <li>stream.tensor.slice (Stream::TensorSliceOp)</li> <li>stream.tensor.splat (Stream::TensorSplatOp)</li> <li>stream.tensor.store (Stream::TensorStoreOp)</li> <li>stream.tensor.trace (Stream::TensorTraceOp)</li> <li>stream.tensor.update (Stream::TensorUpdateOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>CollectiveAttr</li> <li>NamedParameterAttr</li> <li>PartitioningConfigAttr</li> <li>ResourceConfigAttr</li> <li>TimepointAttr</li> </ul> </li> <li>Type constraints<ul> <li>constant resource</li> <li>external resource</li> <li>staging resource</li> <li>transient resource</li> <li>resource</li> <li>variable resource</li> </ul> </li> <li>Types<ul> <li>BindingType</li> <li>ChannelType</li> <li>FileType</li> <li>ResourceType</li> <li>TimepointType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Stream/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Stream/#async-control-flow-ops","title":"Async control flow ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasynccall-streamasynccallop","title":"<code>stream.async.call</code> (Stream::AsyncCallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.call` (`on` `(` $affinity^ `)`)?\n              $callee ``\n              custom&lt;DispatchOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_ends,\n              $resource_operand_lengths) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls a function taking/returning resource values with stream semantics. Asynchronous calls must have no side-effects.</p> <p>Note that returned resources must have their sizes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the size (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If sizes are unknownable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>CallOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type or any type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_ends</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamasyncconcurrent-streamasyncconcurrentop","title":"<code>stream.async.concurrent</code> (Stream::AsyncConcurrentOp)","text":"<p>Executes all ops concurrently</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.concurrent` (`on` `(` $affinity^ `)`)?\n              `with` ``\n              custom&lt;ResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands, $body)\n              attr-dict-with-keyword\n</code></pre> <p>Represents a wave of work scheduled concurrently (each op executing at the same time). All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution.</p> <p>Waves can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+---------v\n+-------|-------+   +-------|-------+\n|    v--+--v    |   |    v--+--v    |\n| +----+ +----+ |   | +----+ +----+ |\n| | %a | | %b | |   | | %c | | %d | |\n| +----+ +----+ |   | +----+ +----+ |\n|    +--v--+    |   |    +--v--+    |\n+-------|-------+   +-------|-------+\n        +---------v---------+\n                  |\n</code></pre></p> <p>Represented with nested waves: <pre><code>  %0 = stream.async.concurrent with(%arg) -&gt; ... {\n    %1 = stream.async.concurrent with(%arg as %arg0) -&gt; ... {\n      %a = ...\n      %b = ...\n      stream.yield %a, %b\n    }\n    %2 = stream.async.concurrent with(%arg as %arg1) -&gt; ... {\n      %c = ...\n      %d = ...\n      stream.yield %c, %d\n    }\n    stream.yield %1, %2\n  }\n</code></pre></p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>HasParent&lt;IREE::Stream::AsyncExecuteOp, IREE::Stream::AsyncConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ClosureOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_1","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_1","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncexecute-streamasyncexecuteop","title":"<code>stream.async.execute</code> (Stream::AsyncExecuteOp)","text":"<p>Executes a dependency-aware sequence of streamable ops</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.execute` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `with` ``\n              custom&lt;ResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands, $body)\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested <code>stream.async.concurrent</code> ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ClosureOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_2","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>result_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_2","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamasyncfunc-streamasyncfuncop","title":"<code>stream.async.func</code> (Stream::AsyncFuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              ``\n              custom&lt;ShapedFunctionSignature&gt;($function_type,\n              $tied_operands,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>stream.async.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#channel-ops","title":"Channel ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamchannelcount-streamchannelcountop","title":"<code>stream.channel.count</code> (Stream::ChannelCountOp)","text":"<p>Returns the total number of participants in the group</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.count` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total participant count in the collective communicator group.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_3","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_3","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamchannelcreate-streamchannelcreateop","title":"<code>stream.channel.create</code> (Stream::ChannelCreateOp)","text":"<p>Creates a new channel for collective communication</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.create` (`on` `(` $affinity^ `)`)?\n              (`id` `(` $id^ `)`)?\n              (`group` `(` $group^ `)`)?\n              (`rank` `(` $rank^ `)`)?\n              (`count` `(` $count^ `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new channel with the given rank associated with the specified affinity. Collective operations using this channel must only be submitted on compatible affinities.</p> <p>The group and ID are optional and may be null. The rank and count can be omitted to indicate a default inherited from the environment or device configuration at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>group</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_4","title":"Operands:","text":"Operand Description <code>id</code> a reference counted byte buffer <code>rank</code> index <code>count</code> index"},{"location":"reference/mlir-dialects/Stream/#results_4","title":"Results:","text":"Result Description <code>result</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#streamchannelrank-streamchannelrankop","title":"<code>stream.channel.rank</code> (Stream::ChannelRankOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.rank` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_5","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_5","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamchannelsplit-streamchannelsplitop","title":"<code>stream.channel.split</code> (Stream::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.split` $channel `,` $color `,` $key\n              `:` type($channel) `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_6","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel <code>color</code> index <code>key</code> index"},{"location":"reference/mlir-dialects/Stream/#results_6","title":"Results:","text":"Result Description <code>result</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#executable-ops","title":"Executable ops","text":""},{"location":"reference/mlir-dialects/Stream/#streambindingsubspan-streambindingsubspanop","title":"<code>stream.binding.subspan</code> (Stream::BindingSubspanOp)","text":"<p>Returns an alias to a subspan of interface binding data</p> <p>Syntax:</p> <pre><code>operation ::= `stream.binding.subspan` $binding `` `[` $byte_offset `]`\n              attr-dict `:` type($binding) `-&gt;` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Returns a subview to a tensor or memref-like type from a binding. The same binding may have multiple subviews at different byte offsets.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_7","title":"Operands:","text":"Operand Description <code>binding</code> a managed resource binding into an executable scope <code>byte_offset</code> index <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_7","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupcount-streamdispatchworkgroupcountop","title":"<code>stream.dispatch.workgroup.count</code> (Stream::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.count[0] : index\n%y = stream.dispatch.workgroup.count[1] : index\n%z = stream.dispatch.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_8","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupid-streamdispatchworkgroupidop","title":"<code>stream.dispatch.workgroup.id</code> (Stream::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, stream.dispatch.workgroup.count)</code> along each dimension.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.id[0] : index\n%y = stream.dispatch.workgroup.id[1] : index\n%z = stream.dispatch.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_9","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupsize-streamdispatchworkgroupsizeop","title":"<code>stream.dispatch.workgroup.size</code> (Stream::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the stream dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.size[0] : index\n%y = stream.dispatch.workgroup.size[1] : index\n%z = stream.dispatch.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_10","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamexecutableend-streamexecutableendop","title":"<code>stream.executable.end</code> (Stream::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable.end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Stream::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/Stream/#streamexecutableexport-streamexecutableexportop","title":"<code>stream.executable.export</code> (Stream::ExecutableExportOp)","text":"<p>Defines an executable entry point for dispatch operations</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              custom&lt;SymbolAlias&gt;($sym_name, $function_ref)\n              custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              attr-dict-with-keyword\n</code></pre> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.</p> <p>Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.</p> <p>Traits: <code>HasParent&lt;IREE::Stream::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Stream/#streamexecutable-streamexecutableop","title":"<code>stream.executable</code> (Stream::ExecutableOp)","text":"<p>Generic executable module</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Stream/#execution-context-ops","title":"Execution context ops","text":"<p>Operations for interacting with the execution context that stream operations execute within.</p>"},{"location":"reference/mlir-dialects/Stream/#streamcontextresolve-streamcontextresolveop","title":"<code>stream.context.resolve</code> (Stream::ContextResolveOp)","text":"<p>Resolves low-level context resources based on type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.context.resolve` (`on` `(` $affinity^ `)`)?\n              attr-dict `:` type($results)\n</code></pre> <p>WIP; allows for accessing the implementation details of lower-level dialects such as the HAL. This will likely be reworked in the future to either live inside other dialects, use some op interface instead of having a dedicated op here, or remove the op entirely and make resolution happen explicitly.</p> <p>Examples: <pre><code>// Returns a HAL device.\n= stream.context.resolve on(#something) : !hal.device\n// Returns a HAL device and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.device, i64\n// Returns a HAL allocator and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.allocator, i64\n</code></pre></p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#explicit-command-ops","title":"Explicit command ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamcmdcall-streamcmdcallop","title":"<code>stream.cmd.call</code> (Stream::CmdCallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.call` $callee ``\n              custom&lt;CmdCallOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_lengths,\n              $resource_operand_accesses) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands),\n              $resource_operand_sizes,\n              type($results),\n              $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls a function operating on resource values with stream semantics. Asynchronous calls must have no side-effects.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>CallOpInterface</code>, <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>SymbolUserOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>resource_operand_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_8","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of index or integer or floating-point or complex-type or resource or external resource or transient resource or variable resource or constant resource or any type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_12","title":"Results:","text":"Result Description <code>results</code> variadic of index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamcmdcollective-streamcmdcollectiveop","title":"<code>stream.cmd.collective</code> (Stream::CmdCollectiveOp)","text":"<p>Dispatches a collective operation</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.collective` `` $op `` `[` $element_count `]`\n              `channel` `(` $channel `)`\n              (`param` `(` $param^ `:` type($param) `)`)? `{`\n              custom&lt;DispatchResources&gt;($resources, type($resources), $resource_sizes,\n              $resource_offsets, $resource_lengths,\n              $resource_accesses)\n              `\\n` `}`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches a collective operation specified against the device. If grouped with other collectives in a <code>stream.cmd.concurrent</code> region the collective operations may fuse and execute more efficiently.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification <code>resource_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_9","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel <code>element_count</code> index <code>param</code> 32-bit signless integer <code>resources</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>resource_sizes</code> variadic of index <code>resource_offsets</code> variadic of index <code>resource_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdconcurrent-streamcmdconcurrentop","title":"<code>stream.cmd.concurrent</code> (Stream::CmdConcurrentOp)","text":"<p>Executes all ops concurrently</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.concurrent` $body\n              attr-dict-with-keyword\n</code></pre> <p>Represents a wave of work scheduled concurrently (each op executing at the same time).</p> <p>Waves can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+---------v\n+-------|-------+   +-------|-------+\n|    v--+--v    |   |    v--+--v    |\n| +----+ +----+ |   | +----+ +----+ |\n| | @a | | @b | |   | | @c | | @d | |\n| +----+ +----+ |   | +----+ +----+ |\n|    +--v--+    |   |    +--v--+    |\n+-------|-------+   +-------|-------+\n        +---------v---------+\n                  |\n</code></pre></p> <p>Represented with nested waves: <pre><code>  stream.cmd.concurrent {\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @a\n      stream.cmd.dispatch @b\n    }\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @c\n      stream.cmd.dispatch @d\n    }\n  }\n</code></pre></p> <p>Traits: <code>HasParent&lt;IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>RegionBranchOpInterface</code>, <code>Stream_StreamableOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#streamcmdcopy-streamcmdcopyop","title":"<code>stream.cmd.copy</code> (Stream::CmdCopyOp)","text":"<p>Copies a subview of a stream resource to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.copy` $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_10","title":"Operands:","text":"Operand Description <code>source</code> any stream-compatible type <code>source_size</code> index <code>source_offset</code> index <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmddiscard-streamcmddiscardop","title":"<code>stream.cmd.discard</code> (Stream::CmdDiscardOp)","text":"<p>Discards a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.discard` $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Discards a subview of a resource, indicating that after this command the specified contents are no longer needed. This can be used to trim memory or invalidate caches.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_11","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmddispatch-streamcmddispatchop","title":"<code>stream.cmd.dispatch</code> (Stream::CmdDispatchOp)","text":"<p>Dispatches a parallelized grid of work</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.dispatch` custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              (`(` $uniform_operands^ `:` type($uniform_operands) `)`)? `{`\n              custom&lt;DispatchResources&gt;($resources, type($resources), $resource_sizes,\n              $resource_offsets, $resource_lengths,\n              $resource_accesses)\n              `\\n` `}`\n              attr-dict-with-keyword\n</code></pre> <p>Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>SymbolUserOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>resource_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_12","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>uniform_operands</code> variadic of index or integer or floating-point or complex-type <code>resources</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>resource_sizes</code> variadic of index <code>resource_offsets</code> variadic of index <code>resource_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdexecute-streamcmdexecuteop","title":"<code>stream.cmd.execute</code> (Stream::CmdExecuteOp)","text":"<p>Executes a dependency-aware sequence of streamable ops</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.execute` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `with` ``\n              custom&lt;ExplicitResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              $body)\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested <code>stream.cmd.concurrent</code> ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>ClosureOpInterface</code>, <code>InferTypeOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_13","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_13","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamcmdfill-streamcmdfillop","title":"<code>stream.cmd.fill</code> (Stream::CmdFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.fill` $value `,`\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($value) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_14","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#streamcmdflush-streamcmdflushop","title":"<code>stream.cmd.flush</code> (Stream::CmdFlushOp)","text":"<p>Flushes a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.flush` (`to` `(` $source_affinity^ `)`)?\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource to an external target. The resource memory is made available to the target and can be made visible there using <code>stream.cmd.invalidate</code>.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_15","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmdfunc-streamcmdfuncop","title":"<code>stream.cmd.func</code> (Stream::CmdFuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name ``\n              custom&lt;DispatchFunctionSignature&gt;($function_type,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>stream.cmd.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#streamcmdinvalidate-streamcmdinvalidateop","title":"<code>stream.cmd.invalidate</code> (Stream::CmdInvalidateOp)","text":"<p>Invalidates a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.invalidate` (`from` `(` $source_affinity^ `)`)?\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource from an external source into the current target. The resource memory is assumed to have been made available at the source via <code>stream.cmd.flush</code>.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_16","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmdserial-streamcmdserialop","title":"<code>stream.cmd.serial</code> (Stream::CmdSerialOp)","text":"<p>Executes all ops serially (in-order)</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.serial` $body\n              attr-dict-with-keyword\n</code></pre> <p>Represents a sequence of work scheduled serially (each op executing one after the other).</p> <p>Regions can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+-----v\n+-------|-------+   +---|----+\n|    v--+--v    |   |   v    |\n| +----+ +----+ |   | +----+ |\n| | @a | | @b | |   | | @c | |\n| +----+ +----+ |   | +----+ |\n|    |     |    |   |   |    |\n|    |     |    |   | +-v--+ |\n|    |     |    |   | | @d | |\n|    |     |    |   | +----+ |\n|    +--v--+    |   |   |    |\n+-------|-------+   +---|----+\n        +---------v-----+\n                  |\n</code></pre></p> <p>Represented with nested regions: <pre><code>  stream.cmd.concurrent {\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @a\n      stream.cmd.dispatch @b\n    }\n    stream.cmd.serial {\n      stream.cmd.dispatch @c\n      stream.cmd.dispatch @d\n    }\n  }\n</code></pre></p> <p>Traits: <code>HasParent&lt;IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>RegionBranchOpInterface</code>, <code>Stream_StreamableOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#file-ops","title":"File ops","text":"<p>File ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamfileconstant-streamfileconstantop","title":"<code>stream.file.constant</code> (Stream::FileConstantOp)","text":"<p>Creates a file backed by the provided constant host memory</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.constant` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `for` $source_length `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Synchronously wraps a host heap buffer into a stream-accessible file handle. Changing the source buffer after definition has undefined behavior.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_17","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>source_length</code> index"},{"location":"reference/mlir-dialects/Stream/#results_14","title":"Results:","text":"Result Description <code>result</code> a file handle used for I/O operations"},{"location":"reference/mlir-dialects/Stream/#streamfileread-streamfilereadop","title":"<code>stream.file.read</code> (Stream::FileReadOp)","text":"<p>Reads a segment of a file into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.read` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads a segment of a file into a resource.</p> <p>Some implementations can stream directly from the source file into device-local memory and file ops should be preferred to manually staging memory through host buffers.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_18","title":"Operands:","text":"Operand Description <code>source</code> a file handle used for I/O operations <code>source_offset</code> 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_15","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamfilewrite-streamfilewriteop","title":"<code>stream.file.write</code> (Stream::FileWriteOp)","text":"<p>Writes a segment of a file from a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.write` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target)\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously writes a segment of a resource into a file. The file range must be valid within the file as this operation cannot grow the underlying file storage.</p> <p>Some implementations can stream directly from device-local memory into the target file and file ops should be preferred to manually staging memory through host buffers.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_19","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>target</code> a file handle used for I/O operations <code>target_offset</code> 64-bit signless integer <code>length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_16","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#miscellaneous-ops","title":"Miscellaneous ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamreturn-streamreturnop","title":"<code>stream.return</code> (Stream::ReturnOp)","text":"<p>Returns results from a region</p> <p>Syntax:</p> <pre><code>operation ::= `stream.return` attr-dict\n              $operands `:` type($operands)\n</code></pre> <p>The values returned are copied by-value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Stream::ExecutableExportOp&gt;</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_20","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamyield-streamyieldop","title":"<code>stream.yield</code> (Stream::YieldOp)","text":"<p>Yields stream values from an execution region</p> <p>Syntax:</p> <pre><code>operation ::= `stream.yield` attr-dict\n              ($resource_operands^ `:`\n              custom&lt;ShapedTypeList&gt;(type($resource_operands),\n              $resource_operand_sizes))?\n</code></pre> <p>The values returned represent the asynchronous value at the point in time the SSA value is defined (or tied).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Stream::AsyncExecuteOp, IREE::Stream::AsyncConcurrentOp, IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>SameVariadicOperandSize</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_21","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#pseudo-ops","title":"Pseudo Ops","text":"<p>Pseudo ops for conversion support.</p>"},{"location":"reference/mlir-dialects/Stream/#streamtensorexport-streamtensorexportop","title":"<code>stream.tensor.export</code> (Stream::TensorExportOp)","text":"<p>Conversion placeholder for stream-&gt;other type conversion</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.export` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to a higher-level dialect type such as <code>tensor</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_22","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_17","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Stream/#streamtensorimport-streamtensorimportop","title":"<code>stream.tensor.import</code> (Stream::TensorImportOp)","text":"<p>Conversion placeholder for other-&gt;stream type conversion</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.import` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              type($source)\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion from a higher-level dialect type such as <code>tensor</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_23","title":"Operands:","text":"Operand Description <code>source</code> any type <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_18","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#resource-ops","title":"Resource ops","text":"<p>Generic resource ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloc-streamresourceallocop","title":"<code>stream.resource.alloc</code> (Stream::ResourceAllocOp)","text":"<p>Allocates a persistent resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.alloc` (`on` `(` $affinity^ `)`)?\n              (`uninitialized` $uninitialized^)?\n              attr-dict `:`\n              type($result) `{` $storage_size `}`\n</code></pre> <p>Allocates a persistent value (one that is long-lived and possibly external to the program) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>Uninitialized allocations will have undefined contents and must only be used when all bytes are discarded prior to any reads. Runtimes decide what \"undefined contents\" means and here it only indicates that execution will be correct even if the memory starts with non-zero values.</p> <p>If multiple values are allocated from the same operation it implies that they have matching lifetimes. When lowering to execution environments the separate allocations may be fused into one or more slab allocations in order to reduce overheads. How many allocations can be fused is based on the size of the individual resources and the target constraints (how large any single buffer may be, etc).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>uninitialized</code>::mlir::UnitAttrunit attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_24","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_19","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloca-streamresourceallocaop","title":"<code>stream.resource.alloca</code> (Stream::ResourceAllocaOp)","text":"<p>Allocates a transient value with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.alloca` `uninitialized`\n              (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              attr-dict\n              type($result) `{` $storage_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n</code></pre> <p>Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>The resource returned is not valid for use until the timepoint is reached; execution using this resource must await on the timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_25","title":"Operands:","text":"Operand Description <code>storage_size</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_20","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceconstants-streamresourceconstantsop","title":"<code>stream.resource.constants</code> (Stream::ResourceConstantsOp)","text":"<p>Asynchronously uploads or maps constant values</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.constants` (`on` `(` $affinity^ `)`)?\n              attr-dict `:`\n              custom&lt;ConstantValueList&gt;(type($results),\n              $result_sizes,\n              $values)\n              `\\n` ` ` ` ` `=` `` `&gt;` type($result_timepoint)\n</code></pre> <p>Represents an upload of constant resources that may be packed, suballocated, and mapped depending on the final lowering target.</p> <p>In runtime environments where memory is shared between host and device this turns into a mapping operation that avoids additional memory allocation and copies. When memory cannot be shared an asynchronous stream will be created to allocate and copy all of the constant values.</p> <p>Though this op returns a unique resource for each constant value it's expected that almost all end up aliasing into the same storage. The exact packing and number of storage resources that are needed are not known until lowering to a particular backend, though, so they are separate here for proper usage tracking.</p> <p>Both constant and variable resources can be produced; a constant is immutable while a variable will be treated as a constant-value initializer for a mutable resource. By modeling these together it's not required that variable initializers first be allocated, copied to the target, and then copied into the variable storage if the target is capable of doing a direct upload or mapping.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameVariadicResultSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>values</code>::mlir::ArrayAttrconstant value array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_26","title":"Operands:","text":"Operand Description <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_21","title":"Results:","text":"Result Description <code>results</code> variadic of constant resource or variable resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourcedealloca-streamresourcedeallocaop","title":"<code>stream.resource.dealloca</code> (Stream::ResourceDeallocaOp)","text":"<p>Frees a transient value when available</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.dealloca` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              $operand `:` type($operand) `{` $operand_size `}`\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict\n</code></pre> <p>Deallocates a transient value (one that is short-lived and local to the current computation) previously allocated using <code>stream.resource.alloca</code>.</p> <p>The resource is considered live and valid until the provided timepoint is reached and the memory is only made available for future requests after the result timepoint is reached.</p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_27","title":"Operands:","text":"Operand Description <code>operand</code> any stream-compatible type <code>operand_size</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_22","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceload-streamresourceloadop","title":"<code>stream.resource.load</code> (Stream::ResourceLoadOp)","text":"<p>Loads a value from a staging resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.load` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element(s) at the given offset in the staging resource. The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be transferred.</p> <p>Interfaces: <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_28","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_size</code> index <code>source_offset</code> index"},{"location":"reference/mlir-dialects/Stream/#results_23","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcepack-streamresourcepackop","title":"<code>stream.resource.pack</code> (Stream::ResourcePackOp)","text":"<p>Packs variable-sized slices into a single slab</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.pack` (`on` `(` $affinity^ `)`)?\n              (`offset` `(` $offset^ `)`)?\n              `slices` `(` `{`\n              custom&lt;PackSliceRanges&gt;($lifetime_intervals,\n              $dynamic_slice_sizes,\n              type($packed_offsets))\n              `}` `)`\n              `:` type($total_length)\n              attr-dict-with-keyword\n</code></pre> <p>Performs a greedy packing of one or more sized slices with specified lifetimes and returns their relative offsets in an aliased linear space.</p> <p>Slices are <code>[start, end] = %slice_byte_size</code>, where the start and end values define an inclusive lifetime range and the size is the total number of bytes required to be live for that range.</p> <pre><code>// Computes the total length required for the packed values and the offsets\n// of the 3 slices requested relative to the base of the packed memory:\n%total_length, %offset_0, %offset_1, %offset_2 =\n    stream.resource.pack\n        // Each slice gets one result offset:\n        slices({\n          // 3 slices where A and B overlap and will get unique offsets\n          // while B and C do not overlap and are allowed to alias.\n          [0, 10] = %size_0,  // A =&gt; %offset_0\n          [3,  8] = %size_1,  // B =&gt; %offset_1\n          [9, 10] = %size_2,  // C =&gt; %offset_2\n          ...\n        }) : index\n</code></pre> <p>The lifetime start and end points (inclusive) are only used for relative comparisons and may originate with any meaning (op order in block, epoch, phase of the moon, etc). The packing algorithm uses the intervals to determine slice liveness and when aliasing is safe.</p> <p>The size of each slice may either be a constant or runtime-computed dynamic value. Constant slices can achieve more dense packing than the dynamic values and CSE/canonicalization should be applied to ensure that as many of the dynamic values are equivalent if possible.</p> <p>The total length required to pack all slices is returned and can be used to acquire storage. The individual slice offsets are 0-based and as such if are directly used as buffer offsets may need additional offsetting. This can either be applied via the optional <code>offset</code> operand or slicing of the underlying allocation buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>lifetime_intervals</code>::mlir::ArrayAttrindex array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_29","title":"Operands:","text":"Operand Description <code>offset</code> index <code>dynamic_slice_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_24","title":"Results:","text":"Result Description <code>total_length</code> index <code>packed_offsets</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamresourcesize-streamresourcesizeop","title":"<code>stream.resource.size</code> (Stream::ResourceSizeOp)","text":"<p>Returns the size of the resource storage in bytes</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.size` (`on` `(` $affinity^ `)`)?\n              $operand\n              attr-dict `:` type($operand)\n</code></pre> <p>Returns a possibly runtime-dynamic byte size of the resource backing storage. This may differ from the logical storage size of a value based on the alignment requirements of the target as well as encoding of higher level values such as sparse tensor formats.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_30","title":"Operands:","text":"Operand Description <code>operand</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#results_25","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamresourcestore-streamresourcestoreop","title":"<code>stream.resource.store</code> (Stream::ResourceStoreOp)","text":"<p>Stores a value into a staging resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.store` $value `,`\n              $target `[` $target_offset `]` `:`\n              type($value)\n              `-&gt;`\n              type($target) `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be acquired.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_31","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_size</code> index <code>target_offset</code> index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcesubview-streamresourcesubviewop","title":"<code>stream.resource.subview</code> (Stream::ResourceSubviewOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.subview` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Aliases a byte subrange of a resource.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>StreamableOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code>, <code>Util_SubrangeOp</code>, <code>ViewLikeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_32","title":"Operands:","text":"Operand Description <code>source</code> any stream-compatible type <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_26","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcetry_map-streamresourcetrymapop","title":"<code>stream.resource.try_map</code> (Stream::ResourceTryMapOp)","text":"<p>Maps read-only memory into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.try_map` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `]` `:`\n              type($source)\n              `-&gt;`\n              type($did_map) `,` type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Synchronously maps a host heap buffer into a stream-accessible resource with the requested lifetime. If the given source cannot be mapped the <code>did_map</code> result will be 0 and users must find another route into memory (such as file I/O). The resulting resource is not coherent with the source and behavior is undefined if the underlying contents change.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_33","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_27","title":"Results:","text":"Result Description <code>did_map</code> 1-bit signless integer <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#resource-parameter-io-ops","title":"Resource parameter I/O ops","text":"<p>Resource parameter I/O ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamparametergather-streamparametergatherop","title":"<code>stream.parameter.gather</code> (Stream::ParameterGatherOp)","text":"<p>Gathers multiple resources from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.gather` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterGatherOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              $target, type($target), $target_size, $target_offsets, $target_lengths)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously gathers one or more resources into a single target stream resource. This is equivalent to one <code>stream.parameter.read</code> per parameter but allows implementations that can batch operations to do so without additional timeline overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_34","title":"Operands:","text":"Operand Description <code>source_offsets</code> variadic of 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offsets</code> variadic of index <code>target_lengths</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_28","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterload-streamparameterloadop","title":"<code>stream.parameter.load</code> (Stream::ParameterLoadOp)","text":"<p>Reads one or more resources from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.load` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterLoadOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              type($results), $result_sizes)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads one or more resources from an external parameter provider and returns the resulting stream resources. Depending on the resource type this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if <code>stream.resource.alloca</code> and <code>stream.parameter.read</code> had been used per parameter.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_35","title":"Operands:","text":"Operand Description <code>source_offsets</code> variadic of 64-bit signless integer <code>result_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_29","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterread-streamparameterreadop","title":"<code>stream.parameter.read</code> (Stream::ParameterReadOp)","text":"<p>Reads a resource from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.read` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              custom&lt;ParameterReference&gt;($source_scope, $source_key)\n              `` `[` $source_offset `]` `-&gt;`\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads a resource from an external parameter provider into the provided target resource range.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_key</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_36","title":"Operands:","text":"Operand Description <code>source_offset</code> 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_30","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterscatter-streamparameterscatterop","title":"<code>stream.parameter.scatter</code> (Stream::ParameterScatterOp)","text":"<p>Scatters multiple resources to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.scatter` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterScatterOperations&gt;(\n              $source, type($source), $source_size, $source_offsets, $source_lengths,\n              $target_scope, $target_keys, $target_offsets)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously scatters one or more resources from a single source resource into one or more parameters. This is equivalent to one <code>stream.parameter.write</code> per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_37","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offsets</code> variadic of index <code>source_lengths</code> variadic of index <code>target_offsets</code> variadic of 64-bit signless integer <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_31","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterwrite-streamparameterwriteop","title":"<code>stream.parameter.write</code> (Stream::ParameterWriteOp)","text":"<p>Writes a resource to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.write` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              $source `[` $source_offset `for` $source_length `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ParameterReference&gt;($target_scope, $target_key)\n              `` `[` $target_offset `]`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously writes a resource to an external parameter provider from the provided source resource range.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_key</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_38","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_length</code> index <code>target_offset</code> 64-bit signless integer <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_32","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#resource-transfer-ops","title":"Resource transfer ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasyncalloca-streamasyncallocaop","title":"<code>stream.async.alloca</code> (Stream::AsyncAllocaOp)","text":"<p>Allocates a transient value with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.alloca` (`on` `(` $affinity^ `)`)?\n              attr-dict `:` type($result) `{` $storage_size `}`\n</code></pre> <p>Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_39","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_33","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncclone-streamasynccloneop","title":"<code>stream.async.clone</code> (Stream::AsyncCloneOp)","text":"<p>Clones the contents of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.clone` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_40","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_34","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccollective-streamasynccollectiveop","title":"<code>stream.async.collective</code> (Stream::AsyncCollectiveOp)","text":"<p>Performs a collective operation</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.collective` `` $op `` `[` $element_count `]`\n              (`on` `(` $affinity^ `)`)?\n              `channel` `(` $channel `)`\n              custom&lt;CollectiveParam&gt;(ref($op), $param) ``\n              $source `[` $source_offset `to` $source_end `for` $source_length `]` `,`\n              $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>TODO: document different usage. For now this should be considered a prototype and that modeling of collective operations may change in the future to better ensure in-place operations (where send/recv is a subset of recv/send). We may have dedicated operations for the send and recv verbs as they have sequencing implications - or we could add optional sequencing to this base op.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_41","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>target_length</code> index <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>source_length</code> index <code>element_count</code> index <code>channel</code> a collective communication channel <code>param</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_35","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncconstant-streamasyncconstantop","title":"<code>stream.async.constant</code> (Stream::AsyncConstantOp)","text":"<p>Defines a constant resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.constant` (`on` `(` $affinity^ `)`)?\n              `:`\n              type($result) `` `{` $result_size `}`\n              `=`\n              $value\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new resource with the given constant value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_42","title":"Operands:","text":"Operand Description <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_36","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccopy-streamasynccopyop","title":"<code>stream.async.copy</code> (Stream::AsyncCopyOp)","text":"<p>Copies a subview of a stream resource to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.copy` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `to` $source_end `]` `,`\n              $target `[` $target_offset `to` $target_end `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource. Unlike <code>stream.async.update</code> copy sources cannot be allocated in-place.</p> <p>Equivalent to a stream.async.slice + stream.async.update.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_43","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Stream/#results_37","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncdispatch-streamasyncdispatchop","title":"<code>stream.async.dispatch</code> (Stream::AsyncDispatchOp)","text":"<p>Dispatches a parallelized grid of work</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.dispatch` (`on` `(` $affinity^ `)`)?\n              custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              custom&lt;DispatchOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_ends,\n              $resource_operand_lengths) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_44","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_ends</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_38","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncfill-streamasyncfillop","title":"<code>stream.async.fill</code> (Stream::AsyncFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.fill` (`on` `(` $affinity^ `)`)?\n              $value `,`\n              $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n              type($value) `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Equivalent to a stream.async.splat + stream.async.update.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_45","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>target_length</code> index <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_39","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncload-streamasyncloadop","title":"<code>stream.async.load</code> (Stream::AsyncLoadOp)","text":"<p>Loads a value from a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.load` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the resource.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_46","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_size</code> index <code>source_offset</code> index"},{"location":"reference/mlir-dialects/Stream/#results_40","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamasyncslice-streamasyncsliceop","title":"<code>stream.async.slice</code> (Stream::AsyncSliceOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.slice` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `to` $source_end `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Slices a subrange of a stream resource based on a byte range. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_47","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_41","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncsplat-streamasyncsplatop","title":"<code>stream.async.splat</code> (Stream::AsyncSplatOp)","text":"<p>Splats a value into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.splat` (`on` `(` $affinity^ `)`)?\n              $value `:` type($value) `-&gt;` type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new resource with the given primitive value splatted out to fill the entire contents.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_48","title":"Operands:","text":"Operand Description <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_42","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncstore-streamasyncstoreop","title":"<code>stream.async.store</code> (Stream::AsyncStoreOp)","text":"<p>Stores a value into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.store` $value `,`\n              $target `[` $target_offset `]` `:`\n              type($value)\n              `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a resource with the element at the given offset set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_49","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_size</code> index <code>target_offset</code> index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_43","title":"Results:","text":"Result Description <code>result</code> staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasynctransfer-streamasynctransferop","title":"<code>stream.async.transfer</code> (Stream::AsyncTransferOp)","text":"<p>Transfers a resource from one location/state to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.transfer` (`from` `(` $source_affinity^ `)`)?\n              $source `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              (`to` `(` $result_affinity^ `)`)?\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource between different states (such as a <code>staging</code> lifetime to a <code>local</code> lifetime) or different affinities. This is roughly equivalent to a cast but may have special semantics when later lowered to one or more devices with discrete memory spaces or pools.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity <code>result_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_50","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>source_size</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_44","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncupdate-streamasyncupdateop","title":"<code>stream.async.update</code> (Stream::AsyncUpdateOp)","text":"<p>Updates a slice of a subview of a resource in-place</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.update` (`on` `(` $affinity^ `)`)?\n              $update `,`\n              $target `[` $target_offset `to` $target_end `]` `:`\n              type($update) `` `{` $update_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a value into a resource based on a byte range. The returned value is the entire updated target value. Updates can be turned into placement allocations and avoid copies.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_51","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>update</code> resource or external resource or transient resource or variable resource or constant resource <code>update_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_45","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#synchronization-ops","title":"Synchronization ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtimepointawait-streamtimepointawaitop","title":"<code>stream.timepoint.await</code> (Stream::TimepointAwaitOp)","text":"<p>Awaits a timepoint before returning a set of resources</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.await` (`on` `(` $affinity^ `)`)?\n              $await_timepoint `=` `` `&gt;`\n              $resource_operands `:`\n              custom&lt;ShapedTypeList&gt;(type($resource_operands),\n              type($results), $resource_operand_sizes)\n              attr-dict-with-keyword\n</code></pre> <p>After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables resolving the version of a resource after a particular point in the timeline. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_52","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_46","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtimepointbarrier-streamtimepointbarrierop","title":"<code>stream.timepoint.barrier</code> (Stream::TimepointBarrierOp)","text":"<p>Returns a timepoint indicating when a resource is available</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.barrier` (`on` `(` $affinity^ `)`)?\n              $resource `:` type($resource) `` `{` $resource_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables identifying when the version of a resource after a particular point in the timeline is available. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_53","title":"Operands:","text":"Operand Description <code>resource</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_47","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointchain_external-streamtimepointchainexternalop","title":"<code>stream.timepoint.chain_external</code> (Stream::TimepointChainExternalOp)","text":"<p>Exports a timepoint to an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.chain_external` (`on` `(` $affinity^ `)`)?\n              $await_timepoint\n              `=` `` `&gt;`\n              `(` $external_values `:` type($external_values) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to an external dialect type such as <code>hal.fence</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Interfaces: <code>Stream_AffinityOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_54","title":"Operands:","text":"Operand Description <code>await_timepoint</code> a timepoint indicating execution availability <code>external_values</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointexport-streamtimepointexportop","title":"<code>stream.timepoint.export</code> (Stream::TimepointExportOp)","text":"<p>Exports a timepoint to an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.export` (`on` `(` $affinity^ `)`)?\n              $await_timepoint\n              `=` `` `&gt;`\n              `(` type($results) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to an external dialect type such as <code>hal.fence</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_49","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_55","title":"Operands:","text":"Operand Description <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_48","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimmediate-streamtimepointimmediateop","title":"<code>stream.timepoint.immediate</code> (Stream::TimepointImmediateOp)","text":"<p>Results an immediately-available timepoint</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.immediate` attr-dict\n              `=` `` `&gt;` type($result_timepoint)\n</code></pre> <p>Timepoints indicate a point in the execution timeline and this op can be used to get a placeholder representing the start of the timeline. Any waits on the returned timepoint will resolve immediately. This generally folds away but can be useful if needing to initialize globals or branch args.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#results_49","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimport-streamtimepointimportop","title":"<code>stream.timepoint.import</code> (Stream::TimepointImportOp)","text":"<p>Imports a timepoint from an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.import` (`on` `(` $affinity^ `)`)?\n              $operands `:` `(` type($operands) `)`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion from an external dialect type such as <code>hal.semaphore</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_50","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_56","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#results_50","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointjoin-streamtimepointjoinop","title":"<code>stream.timepoint.join</code> (Stream::TimepointJoinOp)","text":"<p>Joins one or more timepoints into the max of all of them</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.join` `max` `(` $await_timepoints `)` `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a timepoint that indicates that all of the input timepoints have been reached.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_57","title":"Operands:","text":"Operand Description <code>await_timepoints</code> variadic of a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_51","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtensorclone-streamtensorcloneop","title":"<code>stream.tensor.clone</code> (Stream::TensorCloneOp)","text":"<p>Clones the contents of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.clone` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_51","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_58","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_52","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorconstant-streamtensorconstantop","title":"<code>stream.tensor.constant</code> (Stream::TensorConstantOp)","text":"<p>Defines a constant tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.constant` (`on` `(` $affinity^ `)`)?\n              `:`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result)\n              `=`\n              $value\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized to the given constant value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_52","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_59","title":"Operands:","text":"Operand Description <code>result_encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_53","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorempty-streamtensoremptyop","title":"<code>stream.tensor.empty</code> (Stream::TensorEmptyOp)","text":"<p>Defines an empty tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.empty` (`on` `(` $affinity^ `)`)?\n              `:`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized with no contents. This still carries shape metadata and may encode to a non-empty resource such as in cases where the empty representation still has data (e.g. sparse tensors). Subsequent writes must populate any ranges of the tensor that are later read.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_53","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_60","title":"Operands:","text":"Operand Description <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_54","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorfill-streamtensorfillop","title":"<code>stream.tensor.fill</code> (Stream::TensorFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.fill` (`on` `(` $affinity^ `)`)?\n              $value `,` $target `[` $start_indices `for` $lengths `]` `:`\n              type($value)\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Equivalent to a stream.tensor.splat + stream.tensor.update.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_54","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_61","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>value</code> index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#results_55","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorload-streamtensorloadop","title":"<code>stream.tensor.load</code> (Stream::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.load` $source (`[` $indices^ `]`)? `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_55","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_62","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_56","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamtensorsizeof-streamtensorsizeofop","title":"<code>stream.tensor.sizeof</code> (Stream::TensorSizeOfOp)","text":"<p>Calculates the storage size of a given high-level type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.sizeof` (`on` `(` $affinity^ `)`)?\n              $encoding (`{` $encoding_dims^ `}`)?\n              attr-dict `:` type($storage_size)\n</code></pre> <p>Target-dependent storage size calculation using a high-level annotated type. While within the stream dialect the storage size of a value is left as a placeholder using this op. The requisite target-specific parameters for expanding the size calculation are only available after affinities have been assigned.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_56","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_63","title":"Operands:","text":"Operand Description <code>encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_57","title":"Results:","text":"Result Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#streamtensorslice-streamtensorsliceop","title":"<code>stream.tensor.slice</code> (Stream::TensorSliceOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.slice` (`on` `(` $affinity^ `)`)?\n              $source `[` $start_indices `for` $lengths `]` `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Slices a subrange of a stream resource based on a tensor encoding. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_57","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_64","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_58","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorsplat-streamtensorsplatop","title":"<code>stream.tensor.splat</code> (Stream::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.splat` (`on` `(` $affinity^ `)`)?\n              $value\n              `:` type($value)\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_58","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_65","title":"Operands:","text":"Operand Description <code>value</code> index or integer or floating-point or complex-type <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_59","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorstore-streamtensorstoreop","title":"<code>stream.tensor.store</code> (Stream::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.store` $value `,`\n              $target (`[` $indices^ `]`)? `:`\n              type($value)\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_59","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_66","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>indices</code> variadic of index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_60","title":"Results:","text":"Result Description <code>result</code> staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtensortrace-streamtensortraceop","title":"<code>stream.tensor.trace</code> (Stream::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.trace` $key `=` `[`\n              custom&lt;EncodedResourceOperands&gt;(\n              $resources, type($resources), $resource_sizes,\n              $resource_encodings, $resource_encoding_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ShapeAwareOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_60","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute <code>resource_encodings</code>::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_67","title":"Operands:","text":"Operand Description <code>resources</code> variadic of staging resource <code>resource_sizes</code> variadic of index <code>resource_encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamtensorupdate-streamtensorupdateop","title":"<code>stream.tensor.update</code> (Stream::TensorUpdateOp)","text":"<p>Updates a slice of a subview of a resource in-place</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.update` (`on` `(` $affinity^ `)`)?\n              $update `,` $target `[` $start_indices `]` `:`\n              $update_encoding (`` `{` $update_encoding_dims^ `}`)?\n              `in`\n              type($update) `` `{` $update_size `}`\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a value into a resource based on tensor encodings. The returned value is the entire updated target value.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_61","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>update_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_68","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>start_indices</code> variadic of index <code>update</code> resource or external resource or transient resource or variable resource or constant resource <code>update_encoding_dims</code> variadic of index <code>update_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_61","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#attributes_62","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Stream/#collectiveattr","title":"CollectiveAttr","text":"<p>collective operation and specification</p> <p>Syntax:</p> <pre><code>#stream.collective&lt;\n  CollectiveKind,   # kind\n  std::optional&lt;CollectiveReductionOp&gt;,   # reduction\n  CollectiveElementType   # element_type\n&gt;\n</code></pre> <p>Specifies the collective operation to perform and any mode bits required.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters","title":"Parameters:","text":"Parameter C++ type Description kind <code>CollectiveKind</code> reduction <code>std::optional&lt;CollectiveReductionOp&gt;</code> element_type <code>CollectiveElementType</code>"},{"location":"reference/mlir-dialects/Stream/#namedparameterattr","title":"NamedParameterAttr","text":"<p>named parameter referenced an optional scope and key</p> <p>Syntax:</p> <pre><code>#stream.parameter.named&lt;\n  ::mlir::Type,   # type\n  StringAttr,   # scope\n  StringAttr,   # key\n  DictionaryAttr   # config\n&gt;\n</code></pre> <p>Species an externally-defined parameter that can be referenced by an optional scope defining a set of parameters and a key uniquely identifying the parameter within its scope.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code> scope <code>StringAttr</code> key <code>StringAttr</code> config <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/Stream/#partitioningconfigattr","title":"PartitioningConfigAttr","text":"<p>defines partitioning configuration</p> <p>Configures the partitioning algorithm to use and its configuration. Partitioning is useful to adjust when scheduling behavior of targets is radically different - such as single-threaded vs. multi-threaded CPUs or bespoke ML accelerators vs. general purpose GPUs. This mechanism controls the amount of concurrency, parallelism, memory consumption, and latency.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description favor <code>IREE::Stream::FavorAttr</code>"},{"location":"reference/mlir-dialects/Stream/#resourceconfigattr","title":"ResourceConfigAttr","text":"<p>defines resource constraints configuration</p> <p>Defines resource storage constraints. These allow for packing and layout algorithms to ensure they are producing usable results on target devices.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description maxAllocationSize <code>int64_t</code> minBufferOffsetAlignment <code>int64_t</code> maxBufferRange <code>int64_t</code> minBufferRangeAlignment <code>int64_t</code> indexBits <code>int64_t</code> aliasMutableBindings <code>bool</code> memoryModel <code>IREE::Stream::MemoryModel</code>"},{"location":"reference/mlir-dialects/Stream/#timepointattr","title":"TimepointAttr","text":"<p>an immediately-resolved timepoint</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code>"},{"location":"reference/mlir-dialects/Stream/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/Stream/#constant-resource","title":"constant resource","text":"<p>Stream constants are immutable values that are available for the lifetime of the program once initialized.</p>"},{"location":"reference/mlir-dialects/Stream/#external-resource","title":"external resource","text":"<p>Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#staging-resource","title":"staging resource","text":"<p>Stream upload/download staging resource. These are used outside of streams and then transferred to other stream resources such as variables or transients for use inside of streams. Dispatches and several other operations cannot directly operate on these resources.</p>"},{"location":"reference/mlir-dialects/Stream/#transient-resource","title":"transient resource","text":"<p>Stream transients represent asynchronously-available and sequenced values that have a short lifetime - often only passed between stream executions. It is expected that transient values are not stored in global state and have minimal lifetime as they may be heavily pooled or suballocated.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#resource","title":"resource","text":"<p>A stream resource that has not yet had its lifetime calculated.</p>"},{"location":"reference/mlir-dialects/Stream/#variable-resource","title":"variable resource","text":"<p>Stream variables represent asynchronously-available and sequenced values that have a long lifetime relative to the work being performed on them. These variables are often stored in global state and may live for the entire duration of the program.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Stream/#bindingtype","title":"BindingType","text":"<p>a managed resource binding into an executable scope</p> <p>Syntax: <code>!stream.binding</code></p> <p>A resource binding available within an executable dispatch function. The bindings map 1:1 with the resources bound during dispatch operations.</p>"},{"location":"reference/mlir-dialects/Stream/#channeltype","title":"ChannelType","text":"<p>a collective communication channel</p> <p>Syntax: <code>!stream.channel</code></p> <p>Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.</p> <p>In programs that model SPMD behavior internally channels can be created or provided by hosting applications. For example, the program could expose a <code>@set_channels(!util.list&lt;!stream.channel&gt;)</code> method that stores the channels in globals for use throughout the program allowing for application-controlled channel configuration.</p>"},{"location":"reference/mlir-dialects/Stream/#filetype","title":"FileType","text":"<p>a file handle used for I/O operations</p> <p>Syntax: <code>!stream.file</code></p> <p>A file handle that can be asynchronously read and written into/from stream resources.</p>"},{"location":"reference/mlir-dialects/Stream/#resourcetype","title":"ResourceType","text":"<p>a managed resource</p> <p>Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description lifetime <code>IREE::Stream::Lifetime</code>"},{"location":"reference/mlir-dialects/Stream/#timepointtype","title":"TimepointType","text":"<p>a timepoint indicating execution availability</p> <p>Syntax: <code>!stream.timepoint</code></p> <p>Represents a point in the execution timeline that when resolved indicates that all of the execution prior to this timepoint has completed and the results of the execution are available for use. This includes transitive dependencies as well; if timepoint B is dependent on timepoint A then when B is available so too must be A.</p>"},{"location":"reference/mlir-dialects/Util/","title":"Util","text":""},{"location":"reference/mlir-dialects/Util/#util-dialect","title":"'util' Dialect","text":"<p>A dialect used for types common across IREE subdialects.</p> <ul> <li>'util' Dialect<ul> <li>Operations<ul> <li>Address/offset arithmetic ops<ul> <li>util.align (Util::AlignOp)</li> <li>util.sizeof (Util::SizeOfOp)</li> </ul> </li> <li>Buffer ops<ul> <li>util.buffer.alloc (Util::BufferAllocOp)</li> <li>util.buffer.compare (Util::BufferCompareOp)</li> <li>util.buffer.constant (Util::BufferConstantOp)</li> <li>util.buffer.copy (Util::BufferCopyOp)</li> <li>util.buffer.dealloc (Util::BufferDeallocOp)</li> <li>util.buffer.fill (Util::BufferFillOp)</li> <li>util.buffer.hash (Util::BufferHashOp)</li> <li>util.buffer.load (Util::BufferLoadOp)</li> <li>util.buffer.size (Util::BufferSizeOp)</li> <li>util.buffer.slice (Util::BufferSliceOp)</li> <li>util.buffer.storage (Util::BufferStorageOp)</li> <li>util.buffer.store (Util::BufferStoreOp)</li> <li>util.buffer.subspan (Util::BufferSubspanOp)</li> </ul> </li> <li>Compiler hint ops<ul> <li>util.optimization_barrier (Util::OptimizationBarrierOp)</li> <li>util.unfoldable_constant (Util::UnfoldableConstantOp)</li> <li>util.unreachable (Util::UnreachableOp)</li> </ul> </li> <li>Data type conversion ops<ul> <li>util.numeric.optional_narrow (Util::NumericOptionalNarrowOp)</li> </ul> </li> <li>Global ops<ul> <li>util.global.address (Util::GlobalAddressOp)</li> <li>util.global.load.indirect (Util::GlobalLoadIndirectOp)</li> <li>util.global.load (Util::GlobalLoadOp)</li> <li>util.global (Util::GlobalOp)</li> <li>util.global.store.indirect (Util::GlobalStoreIndirectOp)</li> <li>util.global.store (Util::GlobalStoreOp)</li> </ul> </li> <li>List ops<ul> <li>util.list.create (Util::ListCreateOp)</li> <li>util.list.get (Util::ListGetOp)</li> <li>util.list.resize (Util::ListResizeOp)</li> <li>util.list.set (Util::ListSetOp)</li> <li>util.list.size (Util::ListSizeOp)</li> </ul> </li> <li>Range arithmetic ops<ul> <li>util.range.extents (Util::RangeExtentsOp)</li> <li>util.range.max (Util::RangeMaxOp)</li> <li>util.range.min (Util::RangeMinOp)</li> </ul> </li> <li>Status ops<ul> <li>util.status.check_ok (Util::StatusCheckOkOp)</li> </ul> </li> <li>Structural ops<ul> <li>util.call (Util::CallOp)</li> <li>util.func (Util::FuncOp)</li> <li>util.initializer (Util::InitializerOp)</li> <li>util.return (Util::ReturnOp)</li> </ul> </li> <li>Type manipulation ops<ul> <li>util.cast (Util::CastOp)</li> <li>util.cmp.eq (Util::CmpEQOp)</li> <li>util.null (Util::NullOp)</li> </ul> </li> <li>Value utility ops<ul> <li>util.switch (Util::SwitchOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>BytePatternAttr</li> <li>ByteRangeAttr</li> <li>CompositeAttr</li> <li>InlineAlwaysAttr</li> <li>InlineNeverAttr</li> <li>UninitializedAttr</li> </ul> </li> <li>Types<ul> <li>BufferType</li> <li>ListType</li> <li>ObjectType</li> <li>PtrType</li> <li>VariantType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Util/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Util/#addressoffset-arithmetic-ops","title":"Address/offset arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilalign-utilalignop","title":"<code>util.align</code> (Util::AlignOp)","text":"<p>Aligns up to a power-of-two alignment if required</p> <p>Syntax:</p> <pre><code>operation ::= `util.align` $value `,` $alignment attr-dict `:` type($result)\n</code></pre> <p>Aligns |value| up to the given power-of-two |alignment| if required.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands","title":"Operands:","text":"Operand Description <code>value</code> signless-integer-like <code>alignment</code> signless-integer-like"},{"location":"reference/mlir-dialects/Util/#results","title":"Results:","text":"Result Description <code>result</code> signless-integer-like"},{"location":"reference/mlir-dialects/Util/#utilsizeof-utilsizeofop","title":"<code>util.sizeof</code> (Util::SizeOfOp)","text":"<p>Returns the size in bytes of a datatype</p> <p>Syntax:</p> <pre><code>operation ::= `util.sizeof` $sizedType attr-dict-with-keyword\n</code></pre> <p>Most datatypes have a static size at all layers of the compilation stack. However, those that only have a size for certain lowering flows can be challenging. This op represents such sizes in a way that can be specialized later.</p> <p>Returns the size in bytes, rounded up to the next whole byte of the specified type. This op will fold to a constant index value for IntegerType and FloatType. All others are not folded.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sizedType</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Util/#results_1","title":"Results:","text":"Result Description <code>size</code> index"},{"location":"reference/mlir-dialects/Util/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/Util/#utilbufferalloc-utilbufferallocop","title":"<code>util.buffer.alloc</code> (Util::BufferAllocOp)","text":"<p>Allocates a buffer with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.alloc` `uninitialized`\n              attr-dict\n              `:`\n              type($result) `` `{` $storage_size `}`\n</code></pre> <p>Allocates a buffer with undefined contents. Consumers of the allocated result must assume nothing of the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>alignment</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_1","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_2","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercompare-utilbuffercompareop","title":"<code>util.buffer.compare</code> (Util::BufferCompareOp)","text":"<p>Compares a range of two buffers</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.compare` $lhs `[` $lhs_offset `]` `,`\n              $rhs `[` $rhs_offset `]` `,`\n              $length `:`\n              type($lhs) `` `{` $lhs_size `}` `,`\n              type($rhs) `` `{` $rhs_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns true if the two ranges are bitwise equivalent, somewhat like memcmp.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_2","title":"Operands:","text":"Operand Description <code>lhs</code> a reference counted byte buffer <code>lhs_size</code> index <code>lhs_offset</code> index <code>rhs</code> a reference counted byte buffer <code>rhs_size</code> index <code>rhs_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_3","title":"Results:","text":"Result Description <code>result</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferconstant-utilbufferconstantop","title":"<code>util.buffer.constant</code> (Util::BufferConstantOp)","text":"<p>Constant host-side byte buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttrindex attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#results_4","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercopy-utilbuffercopyop","title":"<code>util.buffer.copy</code> (Util::BufferCopyOp)","text":"<p>Copies a range of bytes between buffers</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.copy` $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a range of bytes as with memcpy (no overlapping).</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_3","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferdealloc-utilbufferdeallocop","title":"<code>util.buffer.dealloc</code> (Util::BufferDeallocOp)","text":"<p>Deallocates a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.dealloc` $operand `:` type($operand) `{` $operand_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Hints that the buffer contents can be discarded. Buffers are reference counted and other owners may keep it live beyond the dealloc.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_4","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer <code>operand_size</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferfill-utilbufferfillop","title":"<code>util.buffer.fill</code> (Util::BufferFillOp)","text":"<p>Fills a range of bytes with a value</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.fill` $pattern `,`\n              $target `[` $target_offset `for` $length `]` `:`\n              type($pattern) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Fills the contents of the buffer in the given byte range with a pattern. The offset and length must match the natural alignment of the pattern type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_5","title":"Operands:","text":"Operand Description <code>pattern</code> integer or floating-point or index <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferhash-utilbufferhashop","title":"<code>util.buffer.hash</code> (Util::BufferHashOp)","text":"<p>Computes the hash of a byte range of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.hash` $source `[` $source_offset `for` $length `]`\n              `:` type($source) `` `{` $source_size `}` `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Computes the SipHash-2-4 of a value at a byte offset with the given length. This always uses a seed of <code>0x0001020304...0e0f</code> and produces a single 64 bit value.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_6","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_5","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferload-utilbufferloadop","title":"<code>util.buffer.load</code> (Util::BufferLoadOp)","text":"<p>Loads a value from a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.load` $source `[` $source_offset `for` $length `]`\n              `:` type($source) `` `{` $source_size `}` `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Loads a value at a byte offset. Must be aligned to the natural size of the result type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_7","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_6","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#utilbuffersize-utilbuffersizeop","title":"<code>util.buffer.size</code> (Util::BufferSizeOp)","text":"<p>Returns the total buffer storage size in bytes</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.size` $operand\n              `:` type($operand)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total length of the buffer in bytes from its base offset.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_8","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#results_7","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferslice-utilbuffersliceop","title":"<code>util.buffer.slice</code> (Util::BufferSliceOp)","text":"<p>Clones a subregion of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.slice` $source `[` $source_offset `]` attr-dict `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n</code></pre> <p>Returns a copy of the contents from the source buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>alignment</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_9","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_8","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbufferstorage-utilbufferstorageop","title":"<code>util.buffer.storage</code> (Util::BufferStorageOp)","text":"<p>Returns the underlying buffer storage range</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.storage` $operand\n              `:` type($operand) `` `{` $operand_size `}` `-&gt;` `(` type($result) `,` type($offset) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer storage as a memref that must be offset and restricted to the returned range. The memref may be of any type and the user is responsible for ensuring that the reinterpret_cast-like behavior makes sense for the data they are accessing.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_10","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer <code>operand_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_9","title":"Results:","text":"Result Description <code>result</code> memref of any type values <code>offset</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferstore-utilbufferstoreop","title":"<code>util.buffer.store</code> (Util::BufferStoreOp)","text":"<p>Stores a value into a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.store` $source `,`\n              $target `[` $target_offset `for` $length `]`\n              `:` type($source) `-&gt;` type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Stores a value at a byte offset. Must be aligned to the natural size of the source type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_11","title":"Operands:","text":"Operand Description <code>source</code> index or integer or floating-point <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbuffersubspan-utilbuffersubspanop","title":"<code>util.buffer.subspan</code> (Util::BufferSubspanOp)","text":"<p>Returns a reference to a subrange of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.subspan` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a logical view into an underlying source buffer. This induces aliasing and multiple SSA values may allow access to the same underlying buffer storage.</p> <p>Subspans are a compiler-only concept and are propagated by an analysis pass to result in absolute offsets on accesses any place the subrange would have been used.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SubrangeOperandOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code>, <code>Util_SubrangeOp</code>, <code>ViewLikeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_12","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_10","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/Util/#utiloptimization_barrier-utiloptimizationbarrierop","title":"<code>util.optimization_barrier</code> (Util::OptimizationBarrierOp)","text":"<p>Prevents compiler optimizations across a value.</p> <p>Syntax:</p> <pre><code>operation ::= `util.optimization_barrier` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.</p> <p>Traits: <code>SameOperandsAndResultType</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_13","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilunfoldable_constant-utilunfoldableconstantop","title":"<code>util.unfoldable_constant</code> (Util::UnfoldableConstantOp)","text":"<p>A constant that cannot be folded by the compiler.</p> <p>Similar to a std.constant, but is declared as having a side effect and has no folder. This is really just syntactic sugar as it is canonicalized to a std.constant wrapped in an util.optimization_barrier.</p>"},{"location":"reference/mlir-dialects/Util/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/Util/#results_12","title":"Results:","text":"Result Description \u00abunnamed\u00bb any type"},{"location":"reference/mlir-dialects/Util/#utilunreachable-utilunreachableop","title":"<code>util.unreachable</code> (Util::UnreachableOp)","text":"<p>Unreachable assertion op</p> <p>Syntax:</p> <pre><code>operation ::= `util.unreachable` $message attr-dict\n</code></pre> <p>Signals to the compiler that the parent block should not be reachable. This may be converted into a runtime assertion, though ideally they are stripped during translation.</p> <pre><code>^bb0:\n  %true = arith.constant true\n  cond_br %true, ^bb2, ^bb1\n^bb1:\n  // Indicates that this branch should never be taken.\n  util.unreachable \"shouldn't be here\"\n^bb2:\n  ...\n</code></pre> <p>Traits: <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#data-type-conversion-ops","title":"Data type conversion ops","text":""},{"location":"reference/mlir-dialects/Util/#utilnumericoptional_narrow-utilnumericoptionalnarrowop","title":"<code>util.numeric.optional_narrow</code> (Util::NumericOptionalNarrowOp)","text":"<p>Memorializes an optional numeric narrowing that is valid</p> <p>Syntax:</p> <pre><code>operation ::= `util.numeric.optional_narrow` $operand `:` type($operand) `as` $semantic_type attr-dict\n</code></pre> <p>Serves as a placeholder for points in the computation where an optional numeric narrowing can be performed without loss of information. Such ops can guide optimization passes wishing to perform precision reduction.</p> <p>In addition to the operand and result type, this op takes an additional <code>semantic_type</code> attribute representing the semantic target type which can be:   * FloatType   * Signed IntegerType   * Unsigned IntegerType</p> <p>Note that this <code>semantic_type</code> must be a sign-carrying integer if using an integer type and cannot be IndexType (i.e. it can be used to indicate a possible narrowing of an IndexType to a specific integer).</p> <p>If the operand is a TensorType, then the result must be a TensorType. The <code>semantic_type</code> constrains the element type.</p> <p>Optionally, the minimum and maximum integer values (for integer semantic types) are tracked if known.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>semantic_type</code>::mlir::TypeAttrany type attribute <code>min_value</code>::mlir::IntegerAttrarbitrary integer attribute <code>max_value</code>::mlir::IntegerAttrarbitrary integer attribute"},{"location":"reference/mlir-dialects/Util/#operands_14","title":"Operands:","text":"Operand Description <code>operand</code> signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#results_13","title":"Results:","text":"Result Description <code>result</code> signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/Util/#utilglobaladdress-utilglobaladdressop","title":"<code>util.global.address</code> (Util::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.address` (`immutable` $is_immutable^)?\n              $global attr-dict `:` qualified(type($result))\n</code></pre> <p>Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalAddressOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#results_14","title":"Results:","text":"Result Description <code>result</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilgloballoadindirect-utilgloballoadindirectop","title":"<code>util.global.load.indirect</code> (Util::GlobalLoadIndirectOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.load.indirect` (`immutable` $is_immutable^)?\n              $global attr-dict `:` qualified(type($global)) `-&gt;` type($result)\n</code></pre> <p>Returns a copy of the global variable value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#operands_15","title":"Operands:","text":"Operand Description <code>global</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#results_15","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilglobalload-utilgloballoadop","title":"<code>util.global.load</code> (Util::GlobalLoadOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.load` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($result)\n</code></pre> <p>Returns a global variable value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#results_16","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilglobal-utilglobalop","title":"<code>util.global</code> (Util::GlobalOp)","text":"<p>Stateful global variable declaration</p> <p>Syntax:</p> <pre><code>operation ::= `util.global` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different variable storage.</p> <p>Interfaces: <code>Symbol</code>, <code>Util_GlobalOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>::mlir::TypedAttrTypedAttr instance <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance"},{"location":"reference/mlir-dialects/Util/#utilglobalstoreindirect-utilglobalstoreindirectop","title":"<code>util.global.store.indirect</code> (Util::GlobalStoreIndirectOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.store.indirect` $value `,` $global attr-dict `:` type($value) `-&gt;` qualified(type($global))\n</code></pre> <p>Stores a copy of the value into a global variable.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_16","title":"Operands:","text":"Operand Description <code>value</code> any type <code>global</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilglobalstore-utilglobalstoreop","title":"<code>util.global.store</code> (Util::GlobalStoreOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.store` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a copy of the value into a global variable.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#operands_17","title":"Operands:","text":"Operand Description <code>value</code> any type"},{"location":"reference/mlir-dialects/Util/#list-ops","title":"List ops","text":"<p>Ops for <code>!util.list&lt;T&gt;</code> (mostly just a placeholder for now).</p>"},{"location":"reference/mlir-dialects/Util/#utillistcreate-utillistcreateop","title":"<code>util.list.create</code> (Util::ListCreateOp)","text":"<p>Creates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.create` ($initial_capacity^)? attr-dict `:` qualified(type($result))\n</code></pre> <p>Creates a new empty list with an optional initial capacity.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_18","title":"Operands:","text":"Operand Description <code>initial_capacity</code> index"},{"location":"reference/mlir-dialects/Util/#results_17","title":"Results:","text":"Result Description <code>result</code> dense list container type"},{"location":"reference/mlir-dialects/Util/#utillistget-utillistgetop","title":"<code>util.list.get</code> (Util::ListGetOp)","text":"<p>Element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.get` $list `[` $index `]` attr-dict `:` custom&lt;ListTypeGet&gt;(type($list), type($result))\n</code></pre> <p>Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_19","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>index</code> index"},{"location":"reference/mlir-dialects/Util/#results_18","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utillistresize-utillistresizeop","title":"<code>util.list.resize</code> (Util::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.resize` operands attr-dict `:` qualified(type($list))\n</code></pre> <p>Resizes the list to contain <code>new_size</code> elements. This will either truncate the list if the existing size is greater than <code>new_size</code> or extend the list with the default list value of the element type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_20","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>new_size</code> index"},{"location":"reference/mlir-dialects/Util/#utillistset-utillistsetop","title":"<code>util.list.set</code> (Util::ListSetOp)","text":"<p>Element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.set` $list `[` $index `]` `,` $value attr-dict `:` custom&lt;ListTypeSet&gt;(type($list), type($value))\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_21","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>index</code> index <code>value</code> any type"},{"location":"reference/mlir-dialects/Util/#utillistsize-utillistsizeop","title":"<code>util.list.size</code> (Util::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.size` operands attr-dict `:` qualified(type($list))\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_22","title":"Operands:","text":"Operand Description <code>list</code> dense list container type"},{"location":"reference/mlir-dialects/Util/#results_19","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Util/#range-arithmetic-ops","title":"Range arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilrangeextents-utilrangeextentsop","title":"<code>util.range.extents</code> (Util::RangeExtentsOp)","text":"<p>Returns the min/max of a union of a set of ranges</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.extents` custom&lt;RangeList&gt;($offsets, $lengths) attr-dict `:` type($min)\n</code></pre> <p>Computes min(offsets) and max(offsets + lengths). Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_23","title":"Operands:","text":"Operand Description <code>offsets</code> variadic of index or integer <code>lengths</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_20","title":"Results:","text":"Result Description <code>min</code> index or integer <code>max</code> index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemax-utilrangemaxop","title":"<code>util.range.max</code> (Util::RangeMaxOp)","text":"<p>Returns the max of all values</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.max` $operands attr-dict `:` type($result)\n</code></pre> <p>Computes the max of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_24","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_21","title":"Results:","text":"Result Description <code>result</code> index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemin-utilrangeminop","title":"<code>util.range.min</code> (Util::RangeMinOp)","text":"<p>Returns the min of all values</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.min` $operands attr-dict `:` type($result)\n</code></pre> <p>Computes the min of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_25","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_22","title":"Results:","text":"Result Description <code>result</code> index or integer"},{"location":"reference/mlir-dialects/Util/#status-ops","title":"Status ops","text":""},{"location":"reference/mlir-dialects/Util/#utilstatuscheck_ok-utilstatuscheckokop","title":"<code>util.status.check_ok</code> (Util::StatusCheckOkOp)","text":"<p>Raises a global failure if a status is not 'ok'</p> <p>Syntax:</p> <pre><code>operation ::= `util.status.check_ok` $status (`,` $message^)? attr-dict\n</code></pre> <p>When the status is not 'ok' this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p>"},{"location":"reference/mlir-dialects/Util/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#operands_26","title":"Operands:","text":"Operand Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/Util/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcall-utilcallop","title":"<code>util.call</code> (Util::CallOp)","text":"<p>Function call operation</p> <p>Syntax:</p> <pre><code>operation ::= `util.call` $callee `(` $operands `)`\n              attr-dict `:`\n              custom&lt;OperandTypeList&gt;(type($operands))\n              `-&gt;`\n              custom&lt;TiedFunctionResultList&gt;(ref($operands),\n              ref(type($operands)),\n              type($results),\n              $tied_operands)\n</code></pre> <p>Represents a direct call to a function that is within the same symbol scope as the call. The operands and result types of the call must match the specified function type.</p> <p>Calls support tied operands which indicate that specific results alias a specific operand. The operand and result types are allowed to differ if a cast is performed within the callee.</p> <p>Example: <pre><code>util.func @fn(%arg0: i32, %arg1: tensor&lt;f32&gt;) -&gt; (f32, %arg1 as tensor&lt;i32&gt;)\n...\n%0 = util.call @fn(%0, %1) : (i32, tensor&lt;f32&gt;) -&gt; (f32, %1 as tensor&lt;i32&gt;)\n</code></pre></p> <p>Interfaces: <code>CallOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Util/#operands_27","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_23","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilfunc-utilfuncop","title":"<code>util.func</code> (Util::FuncOp)","text":"<p>Function operation containing a CFG region</p> <p>An operation declaring a callable function.</p> <p>An external function declaration (used when referring to a function declared in some other module) has no body.</p> <p>Traits: <code>AffineScope</code>, <code>AutomaticAllocationScope</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>OpAsmOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance"},{"location":"reference/mlir-dialects/Util/#utilinitializer-utilinitializerop","title":"<code>util.initializer</code> (Util::InitializerOp)","text":"<p>Global initialization function</p> <p>A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>Util_InitializerOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Util/#utilreturn-utilreturnop","title":"<code>util.return</code> (Util::ReturnOp)","text":"<p>Return from a util.initializer</p> <p>Syntax:</p> <pre><code>operation ::= `util.return` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Returns control from an initializer function.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Util::InitializerOp, IREE::Util::FuncOp&gt;</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_28","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#type-manipulation-ops","title":"Type manipulation ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcast-utilcastop","title":"<code>util.cast</code> (Util::CastOp)","text":"<p>Casts one util type to another ala static_cast/dynamic_cast</p> <p>Syntax:</p> <pre><code>operation ::= `util.cast` $operand attr-dict `:` type($operand) `to` type($result)\n</code></pre> <p>Performs a type cast between object types known to the util dialect.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>CastOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_29","title":"Operands:","text":"Operand Description <code>operand</code> any type"},{"location":"reference/mlir-dialects/Util/#results_24","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilcmpeq-utilcmpeqop","title":"<code>util.cmp.eq</code> (Util::CmpEQOp)","text":"<p>Compares two values for equality</p> <p>Syntax:</p> <pre><code>operation ::= `util.cmp.eq` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands for equality. This is intended for comparing IREE reference types (like !util.buffer) that cannot be used with std.cmpi.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_30","title":"Operands:","text":"Operand Description <code>lhs</code> any type <code>rhs</code> any type"},{"location":"reference/mlir-dialects/Util/#results_25","title":"Results:","text":"Result Description <code>result</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilnull-utilnullop","title":"<code>util.null</code> (Util::NullOp)","text":"<p>Returns a null type value</p> <p>Syntax:</p> <pre><code>operation ::= `util.null` attr-dict `:` type($result)\n</code></pre> <p>Defines an SSA value that is lowered into dialects supporting null/undefined/optional/etc values.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#results_26","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#value-utility-ops","title":"Value utility ops","text":""},{"location":"reference/mlir-dialects/Util/#utilswitch-utilswitchop","title":"<code>util.switch</code> (Util::SwitchOp)","text":"<p>Primitive switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `util.switch` type($default_value) `from`\n              custom&lt;TypedValueList&gt;(ref(type($default_value)), $values, type($values))\n              `at` $index\n              `else` $default_value\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = util.switch %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_31","title":"Operands:","text":"Operand Description <code>index</code> index <code>default_value</code> index or integer or floating-point <code>values</code> variadic of index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#results_27","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#attributes_16","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Util/#bytepatternattr","title":"BytePatternAttr","text":"<p>an attribute containing a filled byte pattern</p> <p>Syntax:</p> <pre><code>#util.byte_pattern&lt;\n  ::mlir::Type,   # type\n  int64_t   # pattern\n&gt;\n</code></pre> <p>A dense serializable attribute with the given byte pattern.</p>"},{"location":"reference/mlir-dialects/Util/#parameters","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code> pattern <code>int64_t</code>"},{"location":"reference/mlir-dialects/Util/#byterangeattr","title":"ByteRangeAttr","text":"<p>defines a range of bytes</p> <p>Specifies a starting offset and total length in bytes.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description offset <code>int64_t</code> length <code>int64_t</code>"},{"location":"reference/mlir-dialects/Util/#compositeattr","title":"CompositeAttr","text":"<p>an attribute composed of a sequence of attributes</p> <p>Models a concatenated set of serializable attributes that when combined form a single sequence of i8 elements. As each value references the uniqued storage of the composite element this attribute is cheap to construct. When the full flattened range is required it can be efficiently streamed via the SerializableAttrInterface. All values must also be serializable.</p> <p>All values are tightly packed to byte boundaries. If padding is required it can be inserted as splat elements attributes with the padding value (usually 0). Sub-byte aligned element types will have their individual components padded to byte alignment.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description totalLength <code>int64_t</code> values <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/Util/#inlinealwaysattr","title":"InlineAlwaysAttr","text":"<p>forces inlining on the associated function when possible</p> <p>Syntax: <code>#util.inline.always</code></p> <p>Skips any cost-model decisions as to whether a function should be inlined into call-sites and allows the inlining to happen. Any policies that prevent inlining will still be observed and inlining may fail if any are not satisfied.</p>"},{"location":"reference/mlir-dialects/Util/#inlineneverattr","title":"InlineNeverAttr","text":"<p>disables inlining on the associated function</p> <p>Syntax: <code>#util.inline.never</code></p> <p>Disables inlining of the function the attribute is associated with into any call-site.</p>"},{"location":"reference/mlir-dialects/Util/#uninitializedattr","title":"UninitializedAttr","text":"<p>an attribute specifying uninitialized storage</p> <p>Syntax:</p> <pre><code>#util.uninitialized&lt;\n  ::mlir::Type   # type\n&gt;\n</code></pre> <p>The contents of the storage backing this attribute may be uninitialized at runtime. This is a hint to implementations that if policy allows memory allocated for the storage of this attribute type is allowed to have undefined contents upon return.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code>"},{"location":"reference/mlir-dialects/Util/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Util/#buffertype","title":"BufferType","text":"<p>a reference counted byte buffer</p> <p>Syntax: <code>!util.buffer</code></p> <p>A reference counted byte buffer that models a pointer, offset, and length.</p>"},{"location":"reference/mlir-dialects/Util/#listtype","title":"ListType","text":"<p>dense list container type</p> <p>Syntax:</p> <pre><code>!util.list&lt;\n  Type   # element_type\n&gt;\n</code></pre> <p>Typed container supporting variant storage.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description element_type <code>Type</code>"},{"location":"reference/mlir-dialects/Util/#objecttype","title":"ObjectType","text":"<p>a placeholder for an unspecified object type</p> <p>Syntax: <code>!util.object</code></p> <p>Describes a runtime object type. These may be reference counted or garbage collected at runtime.</p>"},{"location":"reference/mlir-dialects/Util/#ptrtype","title":"PtrType","text":"<p>a pointer-like reference</p> <p>Syntax:</p> <pre><code>!util.ptr&lt;\n  Type   # target_type\n&gt;\n</code></pre> <p>A typed indirect reference to a value. These define a runtime addressable value that is strongly referenced.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description target_type <code>Type</code>"},{"location":"reference/mlir-dialects/Util/#varianttype","title":"VariantType","text":"<p>a placeholder for a variant type (<code>?</code>)</p> <p>Syntax: <code>!util.variant</code></p> <p>Describes a runtime variant type. These may be primitives (i32, f32, etc) or object types.</p>"},{"location":"reference/mlir-dialects/VM/","title":"VM","text":""},{"location":"reference/mlir-dialects/VM/#vm-dialect","title":"'vm' Dialect","text":"<p>A dialect representing operations against an abstract virtual machine.</p> <p>The virtual machine ops are designed to be either serialized to a bytecode representation that can be interpreted at runtime or lowered further to static representations such as LLVM IR, C, etc. The idea is that the types and operations performed are generally just encoding resource ownership rules and control flow that can be represented in many different ways by target runtimes. For example, it should be possible to lower the VM dialect to SPIR-V and run the VM entirely within a persistent Vulkan kernel.</p> <p>With this scalable runtime approach we make some limiting assumptions to keep the required implementations simple. As we assume all real math is happening within dispatch regions the only math we provide is scalar operations used for offset and shape calculations. This also enables simple flow control such as fixed-range loops.</p> <p>Besides integer values the only other storage type is a variant reference modeling an abstract iree_vm_ref_t. This allows automated reference counting to be relied upon by other dialects built on top of the VM dialect and avoids the need for more verbose manual reference counting logic (that may be difficult or impossible to manage given the coroutine-like nature of the VM). Lowering targets can insert the reference counting as needed.</p> <p>The types in the VM dialect correspond to the storage rather than value type, with the interpretation of the type encoded on the op.</p> <ul> <li>'vm' Dialect<ul> <li>Operations<ul> <li>Async/fiber ops<ul> <li>vm.yield (VM::YieldOp)</li> </ul> </li> <li>Bitwise shift and rotate ops<ul> <li>vm.shl.i32 (VM::ShlI32Op)</li> <li>vm.shl.i64 (VM::ShlI64Op)</li> <li>vm.shr.i32.s (VM::ShrI32SOp)</li> <li>vm.shr.i32.u (VM::ShrI32UOp)</li> <li>vm.shr.i64.s (VM::ShrI64SOp)</li> <li>vm.shr.i64.u (VM::ShrI64UOp)</li> </ul> </li> <li>Buffer ops<ul> <li>vm.buffer.alloc (VM::BufferAllocOp)</li> <li>vm.buffer.clone (VM::BufferCloneOp)</li> <li>vm.buffer.compare (VM::BufferCompareOp)</li> <li>vm.buffer.copy (VM::BufferCopyOp)</li> <li>vm.buffer.fill.f32 (VM::BufferFillF32Op)</li> <li>vm.buffer.fill.f64 (VM::BufferFillF64Op)</li> <li>vm.buffer.fill.i16 (VM::BufferFillI16Op)</li> <li>vm.buffer.fill.i32 (VM::BufferFillI32Op)</li> <li>vm.buffer.fill.i64 (VM::BufferFillI64Op)</li> <li>vm.buffer.fill.i8 (VM::BufferFillI8Op)</li> <li>vm.buffer.hash (VM::BufferHashOp)</li> <li>vm.buffer.length (VM::BufferLengthOp)</li> <li>vm.buffer.load.f32 (VM::BufferLoadF32Op)</li> <li>vm.buffer.load.f64 (VM::BufferLoadF64Op)</li> <li>vm.buffer.load.i16.s (VM::BufferLoadI16SOp)</li> <li>vm.buffer.load.i16.u (VM::BufferLoadI16UOp)</li> <li>vm.buffer.load.i32 (VM::BufferLoadI32Op)</li> <li>vm.buffer.load.i64 (VM::BufferLoadI64Op)</li> <li>vm.buffer.load.i8.s (VM::BufferLoadI8SOp)</li> <li>vm.buffer.load.i8.u (VM::BufferLoadI8UOp)</li> <li>vm.buffer.store.f32 (VM::BufferStoreF32Op)</li> <li>vm.buffer.store.f64 (VM::BufferStoreF64Op)</li> <li>vm.buffer.store.i16 (VM::BufferStoreI16Op)</li> <li>vm.buffer.store.i32 (VM::BufferStoreI32Op)</li> <li>vm.buffer.store.i64 (VM::BufferStoreI64Op)</li> <li>vm.buffer.store.i8 (VM::BufferStoreI8Op)</li> </ul> </li> <li>Casting and conversion ops<ul> <li>vm.bitcast.f32.i32 (VM::BitcastF32I32Op)</li> <li>vm.bitcast.f64.i64 (VM::BitcastF64I64Op)</li> <li>vm.bitcast.i32.f32 (VM::BitcastI32F32Op)</li> <li>vm.bitcast.i64.f64 (VM::BitcastI64F64Op)</li> <li>vm.cast.any.ref (VM::CastAnyRefOp)</li> <li>vm.cast.f32.si32 (VM::CastF32SI32Op)</li> <li>vm.cast.f32.ui32 (VM::CastF32UI32Op)</li> <li>vm.cast.ref.any (VM::CastRefAnyOp)</li> <li>vm.cast.si32.f32 (VM::CastSI32F32Op)</li> <li>vm.cast.ui32.f32 (VM::CastUI32F32Op)</li> <li>vm.ext.f32.f64 (VM::ExtF32F64Op)</li> <li>vm.ext.i16.i32.s (VM::ExtI16I32SOp)</li> <li>vm.ext.i16.i32.u (VM::ExtI16I32UOp)</li> <li>vm.ext.i16.i64.s (VM::ExtI16I64SOp)</li> <li>vm.ext.i16.i64.u (VM::ExtI16I64UOp)</li> <li>vm.ext.i32.i64.s (VM::ExtI32I64SOp)</li> <li>vm.ext.i32.i64.u (VM::ExtI32I64UOp)</li> <li>vm.ext.i8.i32.s (VM::ExtI8I32SOp)</li> <li>vm.ext.i8.i32.u (VM::ExtI8I32UOp)</li> <li>vm.ext.i8.i64.s (VM::ExtI8I64SOp)</li> <li>vm.ext.i8.i64.u (VM::ExtI8I64UOp)</li> <li>vm.trunc.f64.f32 (VM::TruncF64F32Op)</li> <li>vm.trunc.i16.i8 (VM::TruncI16I8Op)</li> <li>vm.trunc.i32.i16 (VM::TruncI32I16Op)</li> <li>vm.trunc.i32.i8 (VM::TruncI32I8Op)</li> <li>vm.trunc.i64.i16 (VM::TruncI64I16Op)</li> <li>vm.trunc.i64.i32 (VM::TruncI64I32Op)</li> <li>vm.trunc.i64.i8 (VM::TruncI64I8Op)</li> </ul> </li> <li>Comparison ops<ul> <li>vm.cmp.eq.i32 (VM::CmpEQI32Op)</li> <li>vm.cmp.eq.i64 (VM::CmpEQI64Op)</li> <li>vm.cmp.gte.i32.s (VM::CmpGTEI32SOp)</li> <li>vm.cmp.gte.i32.u (VM::CmpGTEI32UOp)</li> <li>vm.cmp.gte.i64.s (VM::CmpGTEI64SOp)</li> <li>vm.cmp.gte.i64.u (VM::CmpGTEI64UOp)</li> <li>vm.cmp.gt.i32.s (VM::CmpGTI32SOp)</li> <li>vm.cmp.gt.i32.u (VM::CmpGTI32UOp)</li> <li>vm.cmp.gt.i64.s (VM::CmpGTI64SOp)</li> <li>vm.cmp.gt.i64.u (VM::CmpGTI64UOp)</li> <li>vm.cmp.lte.i32.s (VM::CmpLTEI32SOp)</li> <li>vm.cmp.lte.i32.u (VM::CmpLTEI32UOp)</li> <li>vm.cmp.lte.i64.s (VM::CmpLTEI64SOp)</li> <li>vm.cmp.lte.i64.u (VM::CmpLTEI64UOp)</li> <li>vm.cmp.lt.i32.s (VM::CmpLTI32SOp)</li> <li>vm.cmp.lt.i32.u (VM::CmpLTI32UOp)</li> <li>vm.cmp.lt.i64.s (VM::CmpLTI64SOp)</li> <li>vm.cmp.lt.i64.u (VM::CmpLTI64UOp)</li> <li>vm.cmp.ne.i32 (VM::CmpNEI32Op)</li> <li>vm.cmp.ne.i64 (VM::CmpNEI64Op)</li> <li>vm.cmp.nz.i32 (VM::CmpNZI32Op)</li> <li>vm.cmp.nz.i64 (VM::CmpNZI64Op)</li> </ul> </li> <li>Conditional assignment ops<ul> <li>vm.select.f32 (VM::SelectF32Op)</li> <li>vm.select.f64 (VM::SelectF64Op)</li> <li>vm.select.i32 (VM::SelectI32Op)</li> <li>vm.select.i64 (VM::SelectI64Op)</li> <li>vm.select.ref (VM::SelectRefOp)</li> <li>vm.switch.f32 (VM::SwitchF32Op)</li> <li>vm.switch.f64 (VM::SwitchF64Op)</li> <li>vm.switch.i32 (VM::SwitchI32Op)</li> <li>vm.switch.i64 (VM::SwitchI64Op)</li> <li>vm.switch.ref (VM::SwitchRefOp)</li> </ul> </li> <li>Constant ops<ul> <li>vm.const.f32 (VM::ConstF32Op)</li> <li>vm.const.f32.zero (VM::ConstF32ZeroOp)</li> <li>vm.const.f64 (VM::ConstF64Op)</li> <li>vm.const.f64.zero (VM::ConstF64ZeroOp)</li> <li>vm.const.i32 (VM::ConstI32Op)</li> <li>vm.const.i32.zero (VM::ConstI32ZeroOp)</li> <li>vm.const.i64 (VM::ConstI64Op)</li> <li>vm.const.i64.zero (VM::ConstI64ZeroOp)</li> <li>vm.const.ref.rodata (VM::ConstRefRodataOp)</li> <li>vm.const.ref.zero (VM::ConstRefZeroOp)</li> <li>vm.rodata.inline (VM::RodataInlineOp)</li> <li>vm.rodata (VM::RodataOp)</li> <li>vm.rodata.table.inline (VM::RodataTableInlineOp)</li> </ul> </li> <li>Control flow ops<ul> <li>vm.br (VM::BranchOp)</li> <li>vm.br_table (VM::BranchTableOp)</li> <li>vm.call (VM::CallOp)</li> <li>vm.call.variadic (VM::CallVariadicOp)</li> <li>vm.check.eq (VM::CheckEQOp)</li> <li>vm.check.ne (VM::CheckNEOp)</li> <li>vm.check.nz (VM::CheckNZOp)</li> <li>vm.check.nearly_eq (VM::CheckNearlyEQOp)</li> <li>vm.cond_br (VM::CondBranchOp)</li> <li>vm.cond_fail (VM::CondFailOp)</li> <li>vm.fail (VM::FailOp)</li> <li>vm.import.resolved (VM::ImportResolvedOp)</li> <li>vm.return (VM::ReturnOp)</li> </ul> </li> <li>Debugging ops<ul> <li>vm.break (VM::BreakOp)</li> <li>vm.cond_break (VM::CondBreakOp)</li> <li>vm.print (VM::PrintOp)</li> <li>vm.trace (VM::TraceOp)</li> </ul> </li> <li>Floating-point arithmetic ops<ul> <li>vm.abs.f32 (VM::AbsF32Op)</li> <li>vm.abs.f64 (VM::AbsF64Op)</li> <li>vm.add.f32 (VM::AddF32Op)</li> <li>vm.add.f64 (VM::AddF64Op)</li> <li>vm.ceil.f32 (VM::CeilF32Op)</li> <li>vm.ceil.f64 (VM::CeilF64Op)</li> <li>vm.div.f32 (VM::DivF32Op)</li> <li>vm.div.f64 (VM::DivF64Op)</li> <li>vm.fma.f32 (VM::FMAF32Op)</li> <li>vm.fma.f64 (VM::FMAF64Op)</li> <li>vm.floor.f32 (VM::FloorF32Op)</li> <li>vm.floor.f64 (VM::FloorF64Op)</li> <li>vm.max.f32 (VM::MaxF32Op)</li> <li>vm.max.f64 (VM::MaxF64Op)</li> <li>vm.min.f32 (VM::MinF32Op)</li> <li>vm.min.f64 (VM::MinF64Op)</li> <li>vm.mul.f32 (VM::MulF32Op)</li> <li>vm.mul.f64 (VM::MulF64Op)</li> <li>vm.neg.f32 (VM::NegF32Op)</li> <li>vm.neg.f64 (VM::NegF64Op)</li> <li>vm.rem.f32 (VM::RemF32Op)</li> <li>vm.rem.f64 (VM::RemF64Op)</li> <li>vm.round.f32.even (VM::RoundF32EvenOp)</li> <li>vm.round.f32 (VM::RoundF32Op)</li> <li>vm.round.f64.even (VM::RoundF64EvenOp)</li> <li>vm.round.f64 (VM::RoundF64Op)</li> <li>vm.sub.f32 (VM::SubF32Op)</li> <li>vm.sub.f64 (VM::SubF64Op)</li> </ul> </li> <li>Floating-point comparison ops<ul> <li>vm.cmp.eq.f32.near (VM::CmpEQF32NearOp)</li> <li>vm.cmp.eq.f32.o (VM::CmpEQF32OOp)</li> <li>vm.cmp.eq.f32.u (VM::CmpEQF32UOp)</li> <li>vm.cmp.eq.f64.near (VM::CmpEQF64NearOp)</li> <li>vm.cmp.eq.f64.o (VM::CmpEQF64OOp)</li> <li>vm.cmp.eq.f64.u (VM::CmpEQF64UOp)</li> <li>vm.cmp.gte.f32.o (VM::CmpGTEF32OOp)</li> <li>vm.cmp.gte.f32.u (VM::CmpGTEF32UOp)</li> <li>vm.cmp.gte.f64.o (VM::CmpGTEF64OOp)</li> <li>vm.cmp.gte.f64.u (VM::CmpGTEF64UOp)</li> <li>vm.cmp.gt.f32.o (VM::CmpGTF32OOp)</li> <li>vm.cmp.gt.f32.u (VM::CmpGTF32UOp)</li> <li>vm.cmp.gt.f64.o (VM::CmpGTF64OOp)</li> <li>vm.cmp.gt.f64.u (VM::CmpGTF64UOp)</li> <li>vm.cmp.lte.f32.o (VM::CmpLTEF32OOp)</li> <li>vm.cmp.lte.f32.u (VM::CmpLTEF32UOp)</li> <li>vm.cmp.lte.f64.o (VM::CmpLTEF64OOp)</li> <li>vm.cmp.lte.f64.u (VM::CmpLTEF64UOp)</li> <li>vm.cmp.lt.f32.o (VM::CmpLTF32OOp)</li> <li>vm.cmp.lt.f32.u (VM::CmpLTF32UOp)</li> <li>vm.cmp.lt.f64.o (VM::CmpLTF64OOp)</li> <li>vm.cmp.lt.f64.u (VM::CmpLTF64UOp)</li> <li>vm.cmp.ne.f32.o (VM::CmpNEF32OOp)</li> <li>vm.cmp.ne.f32.u (VM::CmpNEF32UOp)</li> <li>vm.cmp.ne.f64.o (VM::CmpNEF64OOp)</li> <li>vm.cmp.ne.f64.u (VM::CmpNEF64UOp)</li> <li>vm.cmp.nz.f32.o (VM::CmpNZF32OOp)</li> <li>vm.cmp.nz.f32.u (VM::CmpNZF32UOp)</li> <li>vm.cmp.nz.f64.o (VM::CmpNZF64OOp)</li> <li>vm.cmp.nz.f64.u (VM::CmpNZF64UOp)</li> <li>vm.cmp.nan.f32 (VM::CmpNaNF32Op)</li> <li>vm.cmp.nan.f64 (VM::CmpNaNF64Op)</li> </ul> </li> <li>Floating-point math ops<ul> <li>vm.atan2.f32 (VM::Atan2F32Op)</li> <li>vm.atan2.f64 (VM::Atan2F64Op)</li> <li>vm.atan.f32 (VM::AtanF32Op)</li> <li>vm.atan.f64 (VM::AtanF64Op)</li> <li>vm.cos.f32 (VM::CosF32Op)</li> <li>vm.cos.f64 (VM::CosF64Op)</li> <li>vm.erf.f32 (VM::ErfF32Op)</li> <li>vm.erf.f64 (VM::ErfF64Op)</li> <li>vm.exp2.f32 (VM::Exp2F32Op)</li> <li>vm.exp2.f64 (VM::Exp2F64Op)</li> <li>vm.exp.f32 (VM::ExpF32Op)</li> <li>vm.exp.f64 (VM::ExpF64Op)</li> <li>vm.expm1.f32 (VM::ExpM1F32Op)</li> <li>vm.expm1.f64 (VM::ExpM1F64Op)</li> <li>vm.log10.f32 (VM::Log10F32Op)</li> <li>vm.log10.f64 (VM::Log10F64Op)</li> <li>vm.log1p.f32 (VM::Log1pF32Op)</li> <li>vm.log1p.f64 (VM::Log1pF64Op)</li> <li>vm.log2.f32 (VM::Log2F32Op)</li> <li>vm.log2.f64 (VM::Log2F64Op)</li> <li>vm.log.f32 (VM::LogF32Op)</li> <li>vm.log.f64 (VM::LogF64Op)</li> <li>vm.pow.f32 (VM::PowF32Op)</li> <li>vm.pow.f64 (VM::PowF64Op)</li> <li>vm.rsqrt.f32 (VM::RsqrtF32Op)</li> <li>vm.rsqrt.f64 (VM::RsqrtF64Op)</li> <li>vm.sin.f32 (VM::SinF32Op)</li> <li>vm.sin.f64 (VM::SinF64Op)</li> <li>vm.sqrt.f32 (VM::SqrtF32Op)</li> <li>vm.sqrt.f64 (VM::SqrtF64Op)</li> <li>vm.tanh.f32 (VM::TanhF32Op)</li> <li>vm.tanh.f64 (VM::TanhF64Op)</li> </ul> </li> <li>Global ops<ul> <li>vm.global.address (VM::GlobalAddressOp)</li> <li>vm.global.f32 (VM::GlobalF32Op)</li> <li>vm.global.f64 (VM::GlobalF64Op)</li> <li>vm.global.i32 (VM::GlobalI32Op)</li> <li>vm.global.i64 (VM::GlobalI64Op)</li> <li>vm.global.load.f32 (VM::GlobalLoadF32Op)</li> <li>vm.global.load.f64 (VM::GlobalLoadF64Op)</li> <li>vm.global.load.i32 (VM::GlobalLoadI32Op)</li> <li>vm.global.load.i64 (VM::GlobalLoadI64Op)</li> <li>vm.global.load.indirect.f32 (VM::GlobalLoadIndirectF32Op)</li> <li>vm.global.load.indirect.f64 (VM::GlobalLoadIndirectF64Op)</li> <li>vm.global.load.indirect.i32 (VM::GlobalLoadIndirectI32Op)</li> <li>vm.global.load.indirect.i64 (VM::GlobalLoadIndirectI64Op)</li> <li>vm.global.load.indirect.ref (VM::GlobalLoadIndirectRefOp)</li> <li>vm.global.load.ref (VM::GlobalLoadRefOp)</li> <li>vm.global.ref (VM::GlobalRefOp)</li> <li>vm.global.store.f32 (VM::GlobalStoreF32Op)</li> <li>vm.global.store.f64 (VM::GlobalStoreF64Op)</li> <li>vm.global.store.i32 (VM::GlobalStoreI32Op)</li> <li>vm.global.store.i64 (VM::GlobalStoreI64Op)</li> <li>vm.global.store.indirect.f32 (VM::GlobalStoreIndirectF32Op)</li> <li>vm.global.store.indirect.f64 (VM::GlobalStoreIndirectF64Op)</li> <li>vm.global.store.indirect.i32 (VM::GlobalStoreIndirectI32Op)</li> <li>vm.global.store.indirect.i64 (VM::GlobalStoreIndirectI64Op)</li> <li>vm.global.store.indirect.ref (VM::GlobalStoreIndirectRefOp)</li> <li>vm.global.store.ref (VM::GlobalStoreRefOp)</li> </ul> </li> <li>Integer arithmetic ops<ul> <li>vm.abs.i32 (VM::AbsI32Op)</li> <li>vm.abs.i64 (VM::AbsI64Op)</li> <li>vm.add.i32 (VM::AddI32Op)</li> <li>vm.add.i64 (VM::AddI64Op)</li> <li>vm.div.i32.s (VM::DivI32SOp)</li> <li>vm.div.i32.u (VM::DivI32UOp)</li> <li>vm.div.i64.s (VM::DivI64SOp)</li> <li>vm.div.i64.u (VM::DivI64UOp)</li> <li>vm.fma.i32 (VM::FMAI32Op)</li> <li>vm.fma.i64 (VM::FMAI64Op)</li> <li>vm.max.i32.s (VM::MaxI32SOp)</li> <li>vm.max.i32.u (VM::MaxI32UOp)</li> <li>vm.max.i64.s (VM::MaxI64SOp)</li> <li>vm.max.i64.u (VM::MaxI64UOp)</li> <li>vm.min.i32.s (VM::MinI32SOp)</li> <li>vm.min.i32.u (VM::MinI32UOp)</li> <li>vm.min.i64.s (VM::MinI64SOp)</li> <li>vm.min.i64.u (VM::MinI64UOp)</li> <li>vm.mul.i32 (VM::MulI32Op)</li> <li>vm.mul.i64 (VM::MulI64Op)</li> <li>vm.rem.i32.s (VM::RemI32SOp)</li> <li>vm.rem.i32.u (VM::RemI32UOp)</li> <li>vm.rem.i64.s (VM::RemI64SOp)</li> <li>vm.rem.i64.u (VM::RemI64UOp)</li> <li>vm.sub.i32 (VM::SubI32Op)</li> <li>vm.sub.i64 (VM::SubI64Op)</li> </ul> </li> <li>Integer bit manipulation ops<ul> <li>vm.and.i32 (VM::AndI32Op)</li> <li>vm.and.i64 (VM::AndI64Op)</li> <li>vm.ctlz.i32 (VM::CtlzI32Op)</li> <li>vm.ctlz.i64 (VM::CtlzI64Op)</li> <li>vm.not.i32 (VM::NotI32Op)</li> <li>vm.not.i64 (VM::NotI64Op)</li> <li>vm.or.i32 (VM::OrI32Op)</li> <li>vm.or.i64 (VM::OrI64Op)</li> <li>vm.xor.i32 (VM::XorI32Op)</li> <li>vm.xor.i64 (VM::XorI64Op)</li> </ul> </li> <li>List ops<ul> <li>vm.list.alloc (VM::ListAllocOp)</li> <li>vm.list.get.f32 (VM::ListGetF32Op)</li> <li>vm.list.get.f64 (VM::ListGetF64Op)</li> <li>vm.list.get.i32 (VM::ListGetI32Op)</li> <li>vm.list.get.i64 (VM::ListGetI64Op)</li> <li>vm.list.get.ref (VM::ListGetRefOp)</li> <li>vm.list.reserve (VM::ListReserveOp)</li> <li>vm.list.resize (VM::ListResizeOp)</li> <li>vm.list.set.f32 (VM::ListSetF32Op)</li> <li>vm.list.set.f64 (VM::ListSetF64Op)</li> <li>vm.list.set.i32 (VM::ListSetI32Op)</li> <li>vm.list.set.i64 (VM::ListSetI64Op)</li> <li>vm.list.set.ref (VM::ListSetRefOp)</li> <li>vm.list.size (VM::ListSizeOp)</li> </ul> </li> <li>Ref comparison ops<ul> <li>vm.cmp.eq.ref (VM::CmpEQRefOp)</li> <li>vm.cmp.ne.ref (VM::CmpNERefOp)</li> <li>vm.cmp.nz.ref (VM::CmpNZRefOp)</li> </ul> </li> <li>Structural ops<ul> <li>vm.export (VM::ExportOp)</li> <li>vm.func (VM::FuncOp)</li> <li>vm.import (VM::ImportOp)</li> <li>vm.initializer (VM::InitializerOp)</li> <li>vm.module (VM::ModuleOp)</li> <li>vm.module_terminator (VM::ModuleTerminatorOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>OrdinalCountsAttr</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/VM/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/VM/#asyncfiber-ops","title":"Async/fiber ops","text":""},{"location":"reference/mlir-dialects/VM/#vmyield-vmyieldop","title":"<code>vm.yield</code> (VM::YieldOp)","text":"<p>Unconditional fiber yield operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.yield` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Yields the fiber for some (likely short) amount of time. This can be used to  perform cooperative scheduling and ensure fair (enough) execution. Execution  resumes at the specified target branch.</p> <p><code>^bb0:    vm.yield ^on_resume  ^on_resume:    ...</code></p> <p>Traits: <code>HasParent&lt;IREE::VM::FuncOp&gt;</code>, <code>Terminator</code>, <code>Util_YieldPoint</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#bitwise-shift-and-rotate-ops","title":"Bitwise shift and rotate ops","text":""},{"location":"reference/mlir-dialects/VM/#vmshli32-vmshli32op","title":"<code>vm.shl.i32</code> (VM::ShlI32Op)","text":"<p>Integer shift left operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shl.i32` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_1","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshli64-vmshli64op","title":"<code>vm.shl.i64</code> (VM::ShlI64Op)","text":"<p>Integer shift left operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shl.i64` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_2","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_1","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32s-vmshri32sop","title":"<code>vm.shr.i32.s</code> (VM::ShrI32SOp)","text":"<p>Signed integer (arithmetic) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i32.s` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_3","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_2","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32u-vmshri32uop","title":"<code>vm.shr.i32.u</code> (VM::ShrI32UOp)","text":"<p>Unsigned integer (logical) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i32.u` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_4","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_3","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64s-vmshri64sop","title":"<code>vm.shr.i64.s</code> (VM::ShrI64SOp)","text":"<p>Signed integer (arithmetic) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i64.s` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_5","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_4","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64u-vmshri64uop","title":"<code>vm.shr.i64.u</code> (VM::ShrI64UOp)","text":"<p>Unsigned integer (logical) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i64.u` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_6","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_5","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbufferalloc-vmbufferallocop","title":"<code>vm.buffer.alloc</code> (VM::BufferAllocOp)","text":"<p>Allocates a new zero-initialized buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.alloc` operands attr-dict `:` type($result)\n</code></pre> <p>Allocates a new zero-initialized buffer with the given size in bytes.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_7","title":"Operands:","text":"Operand Description <code>length</code> 64-bit signless integer <code>alignment</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_6","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmbufferclone-vmbuffercloneop","title":"<code>vm.buffer.clone</code> (VM::BufferCloneOp)","text":"<p>Clones a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.clone` operands attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Clones a range of the source buffer to produce a mutable buffer with the same contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_8","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>alignment</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_7","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmbuffercompare-vmbuffercompareop","title":"<code>vm.buffer.compare</code> (VM::BufferCompareOp)","text":"<p>Compares a range of a buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.compare` operands attr-dict `:` type($lhs_buffer) `,` type($rhs_buffer)\n</code></pre> <p>Returns 1 if the two ranges are bitwise equivalent, somewhat like memcmp.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_9","title":"Operands:","text":"Operand Description <code>lhs_buffer</code> ref <code>lhs_offset</code> 64-bit signless integer <code>rhs_buffer</code> ref <code>rhs_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_8","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbuffercopy-vmbuffercopyop","title":"<code>vm.buffer.copy</code> (VM::BufferCopyOp)","text":"<p>Copies a range of a buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.copy` operands attr-dict `:` type($source_buffer) `-&gt;` type($target_buffer)\n</code></pre> <p>Copies a range of one buffer to another, like memcpy.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_10","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf32-vmbufferfillf32op","title":"<code>vm.buffer.fill.f32</code> (VM::BufferFillF32Op)","text":"<p>Fills the buffer with the given repeating 32-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.f32` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_11","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf64-vmbufferfillf64op","title":"<code>vm.buffer.fill.f64</code> (VM::BufferFillF64Op)","text":"<p>Fills the buffer with the given repeating 64-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.f64` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_12","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli16-vmbufferfilli16op","title":"<code>vm.buffer.fill.i16</code> (VM::BufferFillI16Op)","text":"<p>Fills the buffer with the given repeating 16-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i16` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_13","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli32-vmbufferfilli32op","title":"<code>vm.buffer.fill.i32</code> (VM::BufferFillI32Op)","text":"<p>Fills the buffer with the given repeating 32-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i32` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_14","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli64-vmbufferfilli64op","title":"<code>vm.buffer.fill.i64</code> (VM::BufferFillI64Op)","text":"<p>Fills the buffer with the given repeating 64-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i64` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_15","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli8-vmbufferfilli8op","title":"<code>vm.buffer.fill.i8</code> (VM::BufferFillI8Op)","text":"<p>Fills the buffer with the given repeating 8-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i8` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_16","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 8-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferhash-vmbufferhashop","title":"<code>vm.buffer.hash</code> (VM::BufferHashOp)","text":"<p>Syntax:</p> <pre><code>operation ::= `vm.buffer.hash` $source_buffer `,` $source_offset `,` $length\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Computes the SipHash-2-4 of the source buffer at the given offset for |length| bytes using seed <code>0x0001020304...0e0f</code>.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_17","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_9","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferlength-vmbufferlengthop","title":"<code>vm.buffer.length</code> (VM::BufferLengthOp)","text":"<p>Returns the byte length of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.length` operands attr-dict `:` type($buffer) `-&gt;` type($result)\n</code></pre> <p>Returns the total byte length of the given buffer. This is the exact value as specified during buffer allocation though the underlying system buffer may have additional padding.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_18","title":"Operands:","text":"Operand Description <code>buffer</code> ref"},{"location":"reference/mlir-dialects/VM/#results_10","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf32-vmbufferloadf32op","title":"<code>vm.buffer.load.f32</code> (VM::BufferLoadF32Op)","text":"<p>32-bit floating-point load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.f32` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_19","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_11","title":"Results:","text":"Result Description <code>result</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf64-vmbufferloadf64op","title":"<code>vm.buffer.load.f64</code> (VM::BufferLoadF64Op)","text":"<p>64-bit floating-point load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.f64` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_20","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_12","title":"Results:","text":"Result Description <code>result</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16s-vmbufferloadi16sop","title":"<code>vm.buffer.load.i16.s</code> (VM::BufferLoadI16SOp)","text":"<p>Signed 16-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i16.s` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_21","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_13","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16u-vmbufferloadi16uop","title":"<code>vm.buffer.load.i16.u</code> (VM::BufferLoadI16UOp)","text":"<p>Unsigned 16-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i16.u` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_22","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_14","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi32-vmbufferloadi32op","title":"<code>vm.buffer.load.i32</code> (VM::BufferLoadI32Op)","text":"<p>32-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i32` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_23","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_15","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi64-vmbufferloadi64op","title":"<code>vm.buffer.load.i64</code> (VM::BufferLoadI64Op)","text":"<p>64-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i64` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_24","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_16","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8s-vmbufferloadi8sop","title":"<code>vm.buffer.load.i8.s</code> (VM::BufferLoadI8SOp)","text":"<p>Signed 8-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i8.s` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_25","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_17","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8u-vmbufferloadi8uop","title":"<code>vm.buffer.load.i8.u</code> (VM::BufferLoadI8UOp)","text":"<p>Unsigned 8-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i8.u` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_26","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_18","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref32-vmbufferstoref32op","title":"<code>vm.buffer.store.f32</code> (VM::BufferStoreF32Op)","text":"<p>32-bit floating-point store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.f32` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_27","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref64-vmbufferstoref64op","title":"<code>vm.buffer.store.f64</code> (VM::BufferStoreF64Op)","text":"<p>64-bit floating-point store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.f64` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_28","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei16-vmbufferstorei16op","title":"<code>vm.buffer.store.i16</code> (VM::BufferStoreI16Op)","text":"<p>Unsigned 16-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i16` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_29","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei32-vmbufferstorei32op","title":"<code>vm.buffer.store.i32</code> (VM::BufferStoreI32Op)","text":"<p>32-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i32` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_30","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei64-vmbufferstorei64op","title":"<code>vm.buffer.store.i64</code> (VM::BufferStoreI64Op)","text":"<p>64-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i64` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_31","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei8-vmbufferstorei8op","title":"<code>vm.buffer.store.i8</code> (VM::BufferStoreI8Op)","text":"<p>Unsigned 8-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i8` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_32","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#casting-and-conversion-ops","title":"Casting and conversion ops","text":"<p>Casting and type conversion/emulation.</p>"},{"location":"reference/mlir-dialects/VM/#vmbitcastf32i32-vmbitcastf32i32op","title":"<code>vm.bitcast.f32.i32</code> (VM::BitcastF32I32Op)","text":"<p>Bitcast from a 32-bit float-point value to a 32-bit integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.f32.i32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_33","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_19","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcastf64i64-vmbitcastf64i64op","title":"<code>vm.bitcast.f64.i64</code> (VM::BitcastF64I64Op)","text":"<p>Bitcast from a 64-bit float-point value to a 64-bit integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.f64.i64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_34","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_20","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcasti32f32-vmbitcasti32f32op","title":"<code>vm.bitcast.i32.f32</code> (VM::BitcastI32F32Op)","text":"<p>Bitcast from a 32-bit integer to a 32-bit float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.i32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_35","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_21","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmbitcasti64f64-vmbitcasti64f64op","title":"<code>vm.bitcast.i64.f64</code> (VM::BitcastI64F64Op)","text":"<p>Bitcast from a 64-bit integer to a 64-bit float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.i64.f64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_36","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_22","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastanyref-vmcastanyrefop","title":"<code>vm.cast.any.ref</code> (VM::CastAnyRefOp)","text":"<p>Casts from any ref to a specific ref type</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.any.ref` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Performs a runtime cast of an opaque <code>!vm.ref&lt;?&gt;</code> to a specific <code>!vm.ref&lt;T&gt;</code> and raises an error if the operand does not match the expected type. Null refs can always be cast between types.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_37","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_23","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmcastf32si32-vmcastf32si32op","title":"<code>vm.cast.f32.si32</code> (VM::CastF32SI32Op)","text":"<p>Cast from a float-point value to a signed integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.f32.si32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_38","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_24","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastf32ui32-vmcastf32ui32op","title":"<code>vm.cast.f32.ui32</code> (VM::CastF32UI32Op)","text":"<p>Cast from an float-point value to an unsigned integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.f32.ui32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_39","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_25","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastrefany-vmcastrefanyop","title":"<code>vm.cast.ref.any</code> (VM::CastRefAnyOp)","text":"<p>Casts from a specific ref to any ref type</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.ref.any` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Performs a compile-time widening cast of a specific <code>!vm.ref&lt;T&gt;</code> to an opaque <code>!vm.ref&lt;?&gt;</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_AssignmentOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_40","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_26","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmcastsi32f32-vmcastsi32f32op","title":"<code>vm.cast.si32.f32</code> (VM::CastSI32F32Op)","text":"<p>Cast from a signed integer to a float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.si32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_41","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_27","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastui32f32-vmcastui32f32op","title":"<code>vm.cast.ui32.f32</code> (VM::CastUI32F32Op)","text":"<p>Cast from an unsigned integer to a float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.ui32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_42","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_28","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmextf32f64-vmextf32f64op","title":"<code>vm.ext.f32.f64</code> (VM::ExtF32F64Op)","text":"<p>Floating-point zero extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.f32.f64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_43","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_29","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexti16i32s-vmexti16i32sop","title":"<code>vm.ext.i16.i32.s</code> (VM::ExtI16I32SOp)","text":"<p>Integer sign extend 16 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i32.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_44","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_30","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i32u-vmexti16i32uop","title":"<code>vm.ext.i16.i32.u</code> (VM::ExtI16I32UOp)","text":"<p>Integer zero extend 16 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i32.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_45","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_31","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64s-vmexti16i64sop","title":"<code>vm.ext.i16.i64.s</code> (VM::ExtI16I64SOp)","text":"<p>Integer sign extend 16 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_46","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_32","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64u-vmexti16i64uop","title":"<code>vm.ext.i16.i64.u</code> (VM::ExtI16I64UOp)","text":"<p>Integer zero extend 16 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_47","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_33","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64s-vmexti32i64sop","title":"<code>vm.ext.i32.i64.s</code> (VM::ExtI32I64SOp)","text":"<p>Integer sign extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i32.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_48","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_34","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64u-vmexti32i64uop","title":"<code>vm.ext.i32.i64.u</code> (VM::ExtI32I64UOp)","text":"<p>Integer zero extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i32.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_49","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_35","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32s-vmexti8i32sop","title":"<code>vm.ext.i8.i32.s</code> (VM::ExtI8I32SOp)","text":"<p>Integer sign extend 8 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i32.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_50","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_36","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32u-vmexti8i32uop","title":"<code>vm.ext.i8.i32.u</code> (VM::ExtI8I32UOp)","text":"<p>Integer zero extend 8 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i32.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_51","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_37","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64s-vmexti8i64sop","title":"<code>vm.ext.i8.i64.s</code> (VM::ExtI8I64SOp)","text":"<p>Integer sign extend 8 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_52","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_38","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64u-vmexti8i64uop","title":"<code>vm.ext.i8.i64.u</code> (VM::ExtI8I64UOp)","text":"<p>Integer zero extend 8 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_53","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_39","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtruncf64f32-vmtruncf64f32op","title":"<code>vm.trunc.f64.f32</code> (VM::TruncF64F32Op)","text":"<p>Floating-point truncate to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.f64.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_54","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_40","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtrunci16i8-vmtrunci16i8op","title":"<code>vm.trunc.i16.i8</code> (VM::TruncI16I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i16.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_55","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_41","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i16-vmtrunci32i16op","title":"<code>vm.trunc.i32.i16</code> (VM::TruncI32I16Op)","text":"<p>Integer truncate to 16 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i32.i16` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_56","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_42","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i8-vmtrunci32i8op","title":"<code>vm.trunc.i32.i8</code> (VM::TruncI32I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i32.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_57","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_43","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i16-vmtrunci64i16op","title":"<code>vm.trunc.i64.i16</code> (VM::TruncI64I16Op)","text":"<p>Integer truncate to 16 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i16` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_58","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_44","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i32-vmtrunci64i32op","title":"<code>vm.trunc.i64.i32</code> (VM::TruncI64I32Op)","text":"<p>Integer truncate to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_59","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_45","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i8-vmtrunci64i8op","title":"<code>vm.trunc.i64.i8</code> (VM::TruncI64I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_60","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_46","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#comparison-ops","title":"Comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqi32-vmcmpeqi32op","title":"<code>vm.cmp.eq.i32</code> (VM::CmpEQI32Op)","text":"<p>Integer equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.i32` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_61","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_47","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqi64-vmcmpeqi64op","title":"<code>vm.cmp.eq.i64</code> (VM::CmpEQI64Op)","text":"<p>Integer equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.i64` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_62","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_48","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32s-vmcmpgtei32sop","title":"<code>vm.cmp.gte.i32.s</code> (VM::CmpGTEI32SOp)","text":"<p>Signed integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_63","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_49","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32u-vmcmpgtei32uop","title":"<code>vm.cmp.gte.i32.u</code> (VM::CmpGTEI32UOp)","text":"<p>Unsigned integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_64","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_50","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64s-vmcmpgtei64sop","title":"<code>vm.cmp.gte.i64.s</code> (VM::CmpGTEI64SOp)","text":"<p>Signed integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_65","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_51","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64u-vmcmpgtei64uop","title":"<code>vm.cmp.gte.i64.u</code> (VM::CmpGTEI64UOp)","text":"<p>Unsigned integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_66","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_52","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32s-vmcmpgti32sop","title":"<code>vm.cmp.gt.i32.s</code> (VM::CmpGTI32SOp)","text":"<p>Signed integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_67","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_53","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32u-vmcmpgti32uop","title":"<code>vm.cmp.gt.i32.u</code> (VM::CmpGTI32UOp)","text":"<p>Unsigned integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_68","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_54","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64s-vmcmpgti64sop","title":"<code>vm.cmp.gt.i64.s</code> (VM::CmpGTI64SOp)","text":"<p>Signed integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_69","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_55","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64u-vmcmpgti64uop","title":"<code>vm.cmp.gt.i64.u</code> (VM::CmpGTI64UOp)","text":"<p>Unsigned integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_70","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_56","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32s-vmcmpltei32sop","title":"<code>vm.cmp.lte.i32.s</code> (VM::CmpLTEI32SOp)","text":"<p>Signed integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_71","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_57","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32u-vmcmpltei32uop","title":"<code>vm.cmp.lte.i32.u</code> (VM::CmpLTEI32UOp)","text":"<p>Unsigned integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_72","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_58","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64s-vmcmpltei64sop","title":"<code>vm.cmp.lte.i64.s</code> (VM::CmpLTEI64SOp)","text":"<p>Signed integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_73","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_59","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64u-vmcmpltei64uop","title":"<code>vm.cmp.lte.i64.u</code> (VM::CmpLTEI64UOp)","text":"<p>Unsigned integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_74","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_60","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32s-vmcmplti32sop","title":"<code>vm.cmp.lt.i32.s</code> (VM::CmpLTI32SOp)","text":"<p>Signed integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_75","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_61","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32u-vmcmplti32uop","title":"<code>vm.cmp.lt.i32.u</code> (VM::CmpLTI32UOp)","text":"<p>Unsigned integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_76","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_62","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64s-vmcmplti64sop","title":"<code>vm.cmp.lt.i64.s</code> (VM::CmpLTI64SOp)","text":"<p>Signed integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_77","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_63","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64u-vmcmplti64uop","title":"<code>vm.cmp.lt.i64.u</code> (VM::CmpLTI64UOp)","text":"<p>Unsigned integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_78","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_64","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei32-vmcmpnei32op","title":"<code>vm.cmp.ne.i32</code> (VM::CmpNEI32Op)","text":"<p>Integer inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.i32` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_79","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_65","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei64-vmcmpnei64op","title":"<code>vm.cmp.ne.i64</code> (VM::CmpNEI64Op)","text":"<p>Integer inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.i64` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_80","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_66","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi32-vmcmpnzi32op","title":"<code>vm.cmp.nz.i32</code> (VM::CmpNZI32Op)","text":"<p>Integer non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.i32` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given integer operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_81","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_67","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi64-vmcmpnzi64op","title":"<code>vm.cmp.nz.i64</code> (VM::CmpNZI64Op)","text":"<p>Integer non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.i64` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given integer operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_82","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_68","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#conditional-assignment-ops","title":"Conditional assignment ops","text":""},{"location":"reference/mlir-dialects/VM/#vmselectf32-vmselectf32op","title":"<code>vm.select.f32</code> (VM::SelectF32Op)","text":"<p>Floating-point select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_83","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 32-bit float <code>false_value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_69","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmselectf64-vmselectf64op","title":"<code>vm.select.f64</code> (VM::SelectF64Op)","text":"<p>Floating-point select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_84","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 64-bit float <code>false_value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_70","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmselecti32-vmselecti32op","title":"<code>vm.select.i32</code> (VM::SelectI32Op)","text":"<p>Integer select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_85","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 32-bit signless integer <code>false_value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_71","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselecti64-vmselecti64op","title":"<code>vm.select.i64</code> (VM::SelectI64Op)","text":"<p>Integer select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_86","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 64-bit signless integer <code>false_value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_72","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselectref-vmselectrefop","title":"<code>vm.select.ref</code> (VM::SelectRefOp)","text":"<p>Ref select operation <p>Syntax:</p> <pre><code>operation ::= `vm.select.ref` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_87","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> ref <code>false_value</code> ref"},{"location":"reference/mlir-dialects/VM/#results_73","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmswitchf32-vmswitchf32op","title":"<code>vm.switch.f32</code> (VM::SwitchF32Op)","text":"<p>Floating-point switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.f32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_88","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 32-bit float <code>values</code> variadic of 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_74","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchf64-vmswitchf64op","title":"<code>vm.switch.f64</code> (VM::SwitchF64Op)","text":"<p>Floating-point switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.f64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_89","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 64-bit float <code>values</code> variadic of 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_75","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchi32-vmswitchi32op","title":"<code>vm.switch.i32</code> (VM::SwitchI32Op)","text":"<p>Integer switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.i32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_90","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 32-bit signless integer <code>values</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_76","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchi64-vmswitchi64op","title":"<code>vm.switch.i64</code> (VM::SwitchI64Op)","text":"<p>Integer switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.i64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_91","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 64-bit signless integer <code>values</code> variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_77","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchref-vmswitchrefop","title":"<code>vm.switch.ref</code> (VM::SwitchRefOp)","text":"<p>Ref switch operation <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %arg0 to cases of %r0/%r1/%r2 if arg0==0, ==1, ==2.\n// If %arg0 is out of range (&lt;0 or &gt;2) then default to %null.\n%0 = vm.switch.ref %index[%r0, %r1, %r2] else %null : vm.ref&lt;!foo&gt;\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_92","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> ref <code>values</code> variadic of ref"},{"location":"reference/mlir-dialects/VM/#results_78","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#constant-ops","title":"Constant ops","text":""},{"location":"reference/mlir-dialects/VM/#vmconstf32-vmconstf32op","title":"<code>vm.const.f32</code> (VM::ConstF32Op)","text":"<p>32-bit floating-point constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f32` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>FloatAttr32-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_79","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf32zero-vmconstf32zeroop","title":"<code>vm.const.f32.zero</code> (VM::ConstF32ZeroOp)","text":"<p>32-bit floating-point constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f32.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_80","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64-vmconstf64op","title":"<code>vm.const.f64</code> (VM::ConstF64Op)","text":"<p>64-bit floating-point constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f64` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>FloatAttr64-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_81","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64zero-vmconstf64zeroop","title":"<code>vm.const.f64.zero</code> (VM::ConstF64ZeroOp)","text":"<p>64-bit floating-point constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f64.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_82","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconsti32-vmconsti32op","title":"<code>vm.const.i32</code> (VM::ConstI32Op)","text":"<p>32-bit integer constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i32` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>IntegerAttr32-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_83","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti32zero-vmconsti32zeroop","title":"<code>vm.const.i32.zero</code> (VM::ConstI32ZeroOp)","text":"<p>32-bit integer constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i32.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_84","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64-vmconsti64op","title":"<code>vm.const.i64</code> (VM::ConstI64Op)","text":"<p>64-bit integer constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i64` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>IntegerAttr64-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_85","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64zero-vmconsti64zeroop","title":"<code>vm.const.i64.zero</code> (VM::ConstI64ZeroOp)","text":"<p>64-bit integer constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i64.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_86","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconstrefrodata-vmconstrefrodataop","title":"<code>vm.const.ref.rodata</code> (VM::ConstRefRodataOp)","text":"<p>Constant rodata access operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.ref.rodata` $rodata attr-dict `:` type($value)\n</code></pre> <p>Returns a reference to a read-only buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>rodata</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_87","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmconstrefzero-vmconstrefzeroop","title":"<code>vm.const.ref.zero</code> (VM::ConstRefZeroOp)","text":"<p>Null ref constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.ref.zero` `:` type($result) attr-dict\n</code></pre> <p>Defines a constant null ref that can be used in comparisons and initialization.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_88","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmrodatainline-vmrodatainlineop","title":"<code>vm.rodata.inline</code> (VM::RodataInlineOp)","text":"<p>Inlined constant rodata</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata.inline` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>vm.rodata that can be embedded inline in functions. See vm.rodata for more information.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_89","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmrodata-vmrodataop","title":"<code>vm.rodata</code> (VM::RodataOp)","text":"<p>Read-only data definition operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata` custom&lt;SymbolVisibility&gt;($sym_visibility) $sym_name attr-dict $value\n</code></pre> <p>Defines a blob of read-only constant data that can be represented as a ref. This can be used to store arbitrary data within modules such as large constant buffers and other file contents.</p> <p>Note that the data is reference counted as a way to track its usage once the value leaves the module. For example, returning rodata from an exported function must keep the data (possibly backed by mmap) valid for its entire lifetime.</p> <p>By default all rodata will be aligned in the final module output at a 16-byte granularity. An optional alignment can be specified to override the default for cases where larger or smaller alignments are needed.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>ordinal</code>::mlir::IntegerAttrordinal value <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#vmrodatatableinline-vmrodatatableinlineop","title":"<code>vm.rodata.table.inline</code> (VM::RodataTableInlineOp)","text":"<p>Inlined constant rodata table</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata.table.inline` $table_type attr-dict `:` type($table_result) `,` type($data_result) `=` $data_array\n</code></pre> <p>vm.rodata with another associated vm.rodata table specifying byte offsets and sizes as a subview into the flattened data. The table is a flat array of 32 or 64-bit integers storing (offset, size) in element order.</p> <p>The optional alignment attribute applies to both the table and data rodata. The data_alignment attribute can be used to specify an alignment for the elements of the table, padding to the data alignment with zeros. The element sizes reflect the unpadded attribute storage sizes.</p> <p>See vm.rodata for more information.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>table_name</code>::mlir::StringAttrstring attribute <code>data_name</code>::mlir::StringAttrstring attribute <code>table_type</code>::mlir::TypeAttrtype attribute of 32/64-bit integer <code>data_array</code>::mlir::ArrayAttrarray attribute of serializable attributes <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>data_alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_90","title":"Results:","text":"Result Description <code>table_result</code> ref <code>data_result</code> ref"},{"location":"reference/mlir-dialects/VM/#control-flow-ops","title":"Control flow ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbr-vmbranchop","title":"<code>vm.br</code> (VM::BranchOp)","text":"<p>Unconditional branch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.br` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Represents an unconditional branch operation that branches to a target block  with the given set of arguments.</p> <p><code>^bb0(...):    vm.br ^bb1(%a)  ^bb1(%blockArg1):    ...</code></p> <p>Traits: <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_93","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_1","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmbr_table-vmbranchtableop","title":"<code>vm.br_table</code> (VM::BranchTableOp)","text":"<p>Branch table operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.br_table` $index ` ` `{` `\\n`\n              custom&lt;BranchTableCases&gt;(\n              $defaultDestination, $defaultOperands, type($defaultOperands),\n              $caseDestinations, $caseOperands, type($caseOperands))\n              `}`\n              attr-dict\n</code></pre> <p>Represents a branch table instructing execution to branch to the block with  the specified index. If the index is out of bounds then execution will  branch to the default block.</p> <p><code>vm.br_table %index {    default: ^bb1(%a : i64),    0: ^bb2,    1: ^bb3(%c : i64)  }</code></p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>case_operand_segments</code>::mlir::DenseI32ArrayAttri32 dense array attribute"},{"location":"reference/mlir-dialects/VM/#operands_94","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>defaultOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>caseOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_2","title":"Successors:","text":"Successor Description <code>defaultDestination</code> any successor <code>caseDestinations</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcall-vmcallop","title":"<code>vm.call</code> (VM::CallOp)","text":"<p>Call operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.call` $callee `(` operands `)` attr-dict `:` functional-type(operands, results)\n</code></pre> <p>Calls an internal VM function with the given arguments.</p> <p>Interfaces: <code>CallOpInterface</code>, <code>MemoryEffectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_95","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_91","title":"Results:","text":"Result Description <code>results</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcallvariadic-vmcallvariadicop","title":"<code>vm.call.variadic</code> (VM::CallVariadicOp)","text":"<p>Call operation with variadic arguments</p> <p>Calls an internal VM function with the given arguments. One or more of the arguments may be variadic, encoded as segmented sized operand lists.</p> <p>Variadic arguments must be specified with a total count in the segment_sizes attribute.</p> <p>Interfaces: <code>CallOpInterface</code>, <code>MemoryEffectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>FlatSymbolRefAttrsymbol reference attribute <code>segment_sizes</code>::mlir::DenseIntElementsAttr16-bit signless integer elements attribute <code>segment_types</code>::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/VM/#operands_96","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_92","title":"Results:","text":"Result Description <code>results</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckeq-vmcheckeqop","title":"<code>vm.check.eq</code> (VM::CheckEQOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_97","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckne-vmcheckneop","title":"<code>vm.check.ne</code> (VM::CheckNEOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.ne` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_98","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknz-vmchecknzop","title":"<code>vm.check.nz</code> (VM::CheckNZOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.nz` $value (`,` $message^)? attr-dict `:` type($value)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_99","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknearly_eq-vmchecknearlyeqop","title":"<code>vm.check.nearly_eq</code> (VM::CheckNearlyEQOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.nearly_eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_100","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcond_br-vmcondbranchop","title":"<code>vm.cond_br</code> (VM::CondBranchOp)","text":"<p>Conditional branch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cond_br` $condition `,`\n              $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`\n              $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?\n              attr-dict\n</code></pre> <p>Represents a conditional branch operation that branches to one of the two  target blocks with the given set of arguments.</p> <p><code>^bb0(...):    vm.cond_br %condition, ^bb1(%a), ^bb2(%b)  ^bb1(%blockArg1):    ...  ^bb2(%blockArg2):    ...</code></p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_101","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>trueDestOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>falseDestOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_3","title":"Successors:","text":"Successor Description <code>trueDest</code> any successor <code>falseDest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_fail-vmcondfailop","title":"<code>vm.cond_fail</code> (VM::CondFailOp)","text":"<p>Raises a global failure if the condition is true</p> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p> <p>This is implemented as a pseudo-op that transforms into a vm.fail operation guarded by the condition.</p> <pre><code>%nz = vm.cmp.nz.i32 %value : i32\n%statusCode = vm.const.i32 9\nvm.cond_fail %nz, %statusCode, \"expected non-zero\"\n</code></pre> <p>Traits: <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_102","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfail-vmfailop","title":"<code>vm.fail</code> (VM::FailOp)","text":"<p>Raises a global failure</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fail` $status (`,` $message^)? attr-dict\n</code></pre> <p>Signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p> <pre><code>%statusCode = vm.const.i32 9\nvm.fail %statusCode, \"oh no!\"\n</code></pre> <p>Traits: <code>Terminator</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_103","title":"Operands:","text":"Operand Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmimportresolved-vmimportresolvedop","title":"<code>vm.import.resolved</code> (VM::ImportResolvedOp)","text":"<p>Returns true if an optional import was resolved at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `vm.import.resolved` $import attr-dict `:` type($result)\n</code></pre> <p>Allows for checking whether a optional import was resolved at runtime. If this returns false then attempting to call the imported function will result in a failure at runtime.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>import</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_93","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmreturn-vmreturnop","title":"<code>vm.return</code> (VM::ReturnOp)","text":"<p>Return operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.return` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p>Represents a return operation within a function.</p> <pre><code>vm.func @foo(%0: i32, %1: f8) -&gt; (i32, f8) {\n  vm.return %0, %1 : i32, f8\n}\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_104","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#debugging-ops","title":"Debugging ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbreak-vmbreakop","title":"<code>vm.break</code> (VM::BreakOp)","text":"<p>Unconditional debug break operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.break` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Breaks into the attached debugger or asks for attaching a debugger. After resuming (or if a debugger is not attached) execution will continue at the target block.</p> <p>Traits: <code>Terminator</code>, <code>Util_YieldPoint</code>, <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_105","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_4","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_break-vmcondbreakop","title":"<code>vm.cond_break</code> (VM::CondBreakOp)","text":"<p>Conditional debug break operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cond_break` $condition `,` $dest (`(` $destOperands^ `:` type($destOperands) `)`)?\n              attr-dict\n</code></pre> <p>Breaks into the attached debugger or asks for attaching a debugger if the provided condition is true. After resuming (or if a debugger is not attached) execution will continue at the target block.</p> <p>Traits: <code>Terminator</code>, <code>Util_YieldPoint</code>, <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_106","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_5","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmprint-vmprintop","title":"<code>vm.print</code> (VM::PrintOp)","text":"<p>Message printing operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.print` $message `(` operands `)` attr-dict `:` type(operands)\n</code></pre> <p>Prints the given string message and zero or more values.</p> <p>Traits: <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_107","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmtrace-vmtraceop","title":"<code>vm.trace</code> (VM::TraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trace` $event_name `(` operands `)` attr-dict `:` type(operands)\n</code></pre> <p>Traces one or more values at the time the operation is executed. These values will be encoded into the active trace depending on the active trace verbosity setting.</p> <p>Traits: <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>event_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_108","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#floating-point-arithmetic-ops","title":"Floating-point arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsf32-vmabsf32op","title":"<code>vm.abs.f32</code> (VM::AbsF32Op)","text":"<p>Floating point absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_109","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_94","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmabsf64-vmabsf64op","title":"<code>vm.abs.f64</code> (VM::AbsF64Op)","text":"<p>Floating point absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_110","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_95","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf32-vmaddf32op","title":"<code>vm.add.f32</code> (VM::AddF32Op)","text":"<p>Floating-point add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_111","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_96","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf64-vmaddf64op","title":"<code>vm.add.f64</code> (VM::AddF64Op)","text":"<p>Floating-point add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_112","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_97","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf32-vmceilf32op","title":"<code>vm.ceil.f32</code> (VM::CeilF32Op)","text":"<p>Floating point ceiling operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ceil.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_113","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_98","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf64-vmceilf64op","title":"<code>vm.ceil.f64</code> (VM::CeilF64Op)","text":"<p>Floating point ceiling operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ceil.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_114","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_99","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf32-vmdivf32op","title":"<code>vm.div.f32</code> (VM::DivF32Op)","text":"<p>Floating point division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_115","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_100","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf64-vmdivf64op","title":"<code>vm.div.f64</code> (VM::DivF64Op)","text":"<p>Floating point division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_116","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_101","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf32-vmfmaf32op","title":"<code>vm.fma.f32</code> (VM::FMAF32Op)","text":"<p>Floating point fused multiply-add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_117","title":"Operands:","text":"Operand Description <code>a</code> 32-bit float <code>b</code> 32-bit float <code>c</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_102","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf64-vmfmaf64op","title":"<code>vm.fma.f64</code> (VM::FMAF64Op)","text":"<p>Floating point fused multiply-add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_118","title":"Operands:","text":"Operand Description <code>a</code> 64-bit float <code>b</code> 64-bit float <code>c</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_103","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf32-vmfloorf32op","title":"<code>vm.floor.f32</code> (VM::FloorF32Op)","text":"<p>Floating point floor operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.floor.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_119","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_104","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf64-vmfloorf64op","title":"<code>vm.floor.f64</code> (VM::FloorF64Op)","text":"<p>Floating point floor operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.floor.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_120","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_105","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf32-vmmaxf32op","title":"<code>vm.max.f32</code> (VM::MaxF32Op)","text":"<p>Floating point maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_121","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_106","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf64-vmmaxf64op","title":"<code>vm.max.f64</code> (VM::MaxF64Op)","text":"<p>Floating point maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_122","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_107","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf32-vmminf32op","title":"<code>vm.min.f32</code> (VM::MinF32Op)","text":"<p>Floating point minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_123","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_108","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf64-vmminf64op","title":"<code>vm.min.f64</code> (VM::MinF64Op)","text":"<p>Floating point minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_124","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_109","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf32-vmmulf32op","title":"<code>vm.mul.f32</code> (VM::MulF32Op)","text":"<p>Floating point multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_125","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_110","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf64-vmmulf64op","title":"<code>vm.mul.f64</code> (VM::MulF64Op)","text":"<p>Floating point multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_126","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_111","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf32-vmnegf32op","title":"<code>vm.neg.f32</code> (VM::NegF32Op)","text":"<p>Floating point negation operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.neg.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_127","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_112","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf64-vmnegf64op","title":"<code>vm.neg.f64</code> (VM::NegF64Op)","text":"<p>Floating point negation operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.neg.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_128","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_113","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf32-vmremf32op","title":"<code>vm.rem.f32</code> (VM::RemF32Op)","text":"<p>Floating point remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_129","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_114","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf64-vmremf64op","title":"<code>vm.rem.f64</code> (VM::RemF64Op)","text":"<p>Floating point remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_130","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_115","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32even-vmroundf32evenop","title":"<code>vm.round.f32.even</code> (VM::RoundF32EvenOp)","text":"<p>Rounds the value to the nearest even integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f32.even` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_131","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_116","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32-vmroundf32op","title":"<code>vm.round.f32</code> (VM::RoundF32Op)","text":"<p>Rounds the value to the nearest integer away from zero</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_132","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_117","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64even-vmroundf64evenop","title":"<code>vm.round.f64.even</code> (VM::RoundF64EvenOp)","text":"<p>Rounds the value to the nearest even integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f64.even` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_133","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_118","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64-vmroundf64op","title":"<code>vm.round.f64</code> (VM::RoundF64Op)","text":"<p>Rounds the value to the nearest integer away from zero</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_134","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_119","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf32-vmsubf32op","title":"<code>vm.sub.f32</code> (VM::SubF32Op)","text":"<p>Floating point subtraction operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_135","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_120","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf64-vmsubf64op","title":"<code>vm.sub.f64</code> (VM::SubF64Op)","text":"<p>Floating point subtraction operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_136","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_121","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#floating-point-comparison-ops","title":"Floating-point comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32near-vmcmpeqf32nearop","title":"<code>vm.cmp.eq.f32.near</code> (VM::CmpEQF32NearOp)","text":"<p>Near floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.near` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_137","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_122","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32o-vmcmpeqf32oop","title":"<code>vm.cmp.eq.f32.o</code> (VM::CmpEQF32OOp)","text":"<p>Ordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_138","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_123","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32u-vmcmpeqf32uop","title":"<code>vm.cmp.eq.f32.u</code> (VM::CmpEQF32UOp)","text":"<p>Unordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_139","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_124","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64near-vmcmpeqf64nearop","title":"<code>vm.cmp.eq.f64.near</code> (VM::CmpEQF64NearOp)","text":"<p>Near floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.near` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_140","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_125","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64o-vmcmpeqf64oop","title":"<code>vm.cmp.eq.f64.o</code> (VM::CmpEQF64OOp)","text":"<p>Ordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_141","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_126","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64u-vmcmpeqf64uop","title":"<code>vm.cmp.eq.f64.u</code> (VM::CmpEQF64UOp)","text":"<p>Unordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_142","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_127","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32o-vmcmpgtef32oop","title":"<code>vm.cmp.gte.f32.o</code> (VM::CmpGTEF32OOp)","text":"<p>Ordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_143","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_128","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32u-vmcmpgtef32uop","title":"<code>vm.cmp.gte.f32.u</code> (VM::CmpGTEF32UOp)","text":"<p>Unordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_144","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_129","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64o-vmcmpgtef64oop","title":"<code>vm.cmp.gte.f64.o</code> (VM::CmpGTEF64OOp)","text":"<p>Ordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_145","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_130","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64u-vmcmpgtef64uop","title":"<code>vm.cmp.gte.f64.u</code> (VM::CmpGTEF64UOp)","text":"<p>Unordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_146","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_131","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32o-vmcmpgtf32oop","title":"<code>vm.cmp.gt.f32.o</code> (VM::CmpGTF32OOp)","text":"<p>Ordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_147","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_132","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32u-vmcmpgtf32uop","title":"<code>vm.cmp.gt.f32.u</code> (VM::CmpGTF32UOp)","text":"<p>Unordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_148","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_133","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64o-vmcmpgtf64oop","title":"<code>vm.cmp.gt.f64.o</code> (VM::CmpGTF64OOp)","text":"<p>Ordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_149","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_134","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64u-vmcmpgtf64uop","title":"<code>vm.cmp.gt.f64.u</code> (VM::CmpGTF64UOp)","text":"<p>Unordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_150","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_135","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32o-vmcmpltef32oop","title":"<code>vm.cmp.lte.f32.o</code> (VM::CmpLTEF32OOp)","text":"<p>Ordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_151","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_136","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32u-vmcmpltef32uop","title":"<code>vm.cmp.lte.f32.u</code> (VM::CmpLTEF32UOp)","text":"<p>Unordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_152","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_137","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64o-vmcmpltef64oop","title":"<code>vm.cmp.lte.f64.o</code> (VM::CmpLTEF64OOp)","text":"<p>Ordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_153","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_138","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64u-vmcmpltef64uop","title":"<code>vm.cmp.lte.f64.u</code> (VM::CmpLTEF64UOp)","text":"<p>Unordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_154","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_139","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32o-vmcmpltf32oop","title":"<code>vm.cmp.lt.f32.o</code> (VM::CmpLTF32OOp)","text":"<p>Ordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_155","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_140","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32u-vmcmpltf32uop","title":"<code>vm.cmp.lt.f32.u</code> (VM::CmpLTF32UOp)","text":"<p>Unordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_156","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_141","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64o-vmcmpltf64oop","title":"<code>vm.cmp.lt.f64.o</code> (VM::CmpLTF64OOp)","text":"<p>Ordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_157","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_142","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64u-vmcmpltf64uop","title":"<code>vm.cmp.lt.f64.u</code> (VM::CmpLTF64UOp)","text":"<p>Unordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_158","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_143","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32o-vmcmpnef32oop","title":"<code>vm.cmp.ne.f32.o</code> (VM::CmpNEF32OOp)","text":"<p>Ordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_159","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_144","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32u-vmcmpnef32uop","title":"<code>vm.cmp.ne.f32.u</code> (VM::CmpNEF32UOp)","text":"<p>Unordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_160","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_145","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64o-vmcmpnef64oop","title":"<code>vm.cmp.ne.f64.o</code> (VM::CmpNEF64OOp)","text":"<p>Ordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_161","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_146","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64u-vmcmpnef64uop","title":"<code>vm.cmp.ne.f64.u</code> (VM::CmpNEF64UOp)","text":"<p>Unordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_162","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_147","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32o-vmcmpnzf32oop","title":"<code>vm.cmp.nz.f32.o</code> (VM::CmpNZF32OOp)","text":"<p>Ordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f32.o` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_163","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_148","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32u-vmcmpnzf32uop","title":"<code>vm.cmp.nz.f32.u</code> (VM::CmpNZF32UOp)","text":"<p>Unordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f32.u` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_164","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_149","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64o-vmcmpnzf64oop","title":"<code>vm.cmp.nz.f64.o</code> (VM::CmpNZF64OOp)","text":"<p>Ordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f64.o` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_165","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_150","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64u-vmcmpnzf64uop","title":"<code>vm.cmp.nz.f64.u</code> (VM::CmpNZF64UOp)","text":"<p>Unordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f64.u` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_166","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_151","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf32-vmcmpnanf32op","title":"<code>vm.cmp.nan.f32</code> (VM::CmpNaNF32Op)","text":"<p>Floating-point NaN comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nan.f32` $operand attr-dict `:` type($operand)\n</code></pre> <p>Returns 1 if the value is NaN.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_167","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_152","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf64-vmcmpnanf64op","title":"<code>vm.cmp.nan.f64</code> (VM::CmpNaNF64Op)","text":"<p>Floating-point NaN comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nan.f64` $operand attr-dict `:` type($operand)\n</code></pre> <p>Returns 1 if the value is NaN.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_168","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_153","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#floating-point-math-ops","title":"Floating-point math ops","text":"<p>These map directly to the <code>math</code> dialect.</p>"},{"location":"reference/mlir-dialects/VM/#vmatan2f32-vmatan2f32op","title":"<code>vm.atan2.f32</code> (VM::Atan2F32Op)","text":"<p>2-argument arcus tangent of the given values</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan2.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_169","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_154","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatan2f64-vmatan2f64op","title":"<code>vm.atan2.f64</code> (VM::Atan2F64Op)","text":"<p>2-argument arcus tangent of the given values</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan2.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_170","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_155","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf32-vmatanf32op","title":"<code>vm.atan.f32</code> (VM::AtanF32Op)","text":"<p>Arcus tangent of the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_171","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_156","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf64-vmatanf64op","title":"<code>vm.atan.f64</code> (VM::AtanF64Op)","text":"<p>Arcus tangent of the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_172","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_157","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf32-vmcosf32op","title":"<code>vm.cos.f32</code> (VM::CosF32Op)","text":"<p>Cosine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cos.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_173","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_158","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf64-vmcosf64op","title":"<code>vm.cos.f64</code> (VM::CosF64Op)","text":"<p>Cosine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cos.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_174","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_159","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff32-vmerff32op","title":"<code>vm.erf.f32</code> (VM::ErfF32Op)","text":"<p>Computes the error function of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.erf.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_175","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_160","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff64-vmerff64op","title":"<code>vm.erf.f64</code> (VM::ErfF64Op)","text":"<p>Computes the error function of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.erf.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_176","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_161","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f32-vmexp2f32op","title":"<code>vm.exp2.f32</code> (VM::Exp2F32Op)","text":"<p>Base-2 exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp2.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_177","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_162","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f64-vmexp2f64op","title":"<code>vm.exp2.f64</code> (VM::Exp2F64Op)","text":"<p>Base-2 exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp2.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_178","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_163","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf32-vmexpf32op","title":"<code>vm.exp.f32</code> (VM::ExpF32Op)","text":"<p>Base-e exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_179","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_164","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf64-vmexpf64op","title":"<code>vm.exp.f64</code> (VM::ExpF64Op)","text":"<p>Base-e exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_180","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_165","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f32-vmexpm1f32op","title":"<code>vm.expm1.f32</code> (VM::ExpM1F32Op)","text":"<p>Base-e exponential of the specified value minus 1</p> <p>Syntax:</p> <pre><code>operation ::= `vm.expm1.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_181","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_166","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f64-vmexpm1f64op","title":"<code>vm.expm1.f64</code> (VM::ExpM1F64Op)","text":"<p>Base-e exponential of the specified value minus 1</p> <p>Syntax:</p> <pre><code>operation ::= `vm.expm1.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_182","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_167","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f32-vmlog10f32op","title":"<code>vm.log10.f32</code> (VM::Log10F32Op)","text":"<p>Base-10 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log10.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_183","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_168","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f64-vmlog10f64op","title":"<code>vm.log10.f64</code> (VM::Log10F64Op)","text":"<p>Base-10 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log10.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_184","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_169","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf32-vmlog1pf32op","title":"<code>vm.log1p.f32</code> (VM::Log1pF32Op)","text":"<p>Natural logarithm of one plus the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log1p.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_185","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_170","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf64-vmlog1pf64op","title":"<code>vm.log1p.f64</code> (VM::Log1pF64Op)","text":"<p>Natural logarithm of one plus the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log1p.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_186","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_171","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f32-vmlog2f32op","title":"<code>vm.log2.f32</code> (VM::Log2F32Op)","text":"<p>Base-2 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log2.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_187","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_172","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f64-vmlog2f64op","title":"<code>vm.log2.f64</code> (VM::Log2F64Op)","text":"<p>Base-2 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log2.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_188","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_173","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf32-vmlogf32op","title":"<code>vm.log.f32</code> (VM::LogF32Op)","text":"<p>Base-e logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_189","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_174","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf64-vmlogf64op","title":"<code>vm.log.f64</code> (VM::LogF64Op)","text":"<p>Base-e logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_190","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_175","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf32-vmpowf32op","title":"<code>vm.pow.f32</code> (VM::PowF32Op)","text":"<p>Floating point raised to the power of operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.pow.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_191","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_176","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf64-vmpowf64op","title":"<code>vm.pow.f64</code> (VM::PowF64Op)","text":"<p>Floating point raised to the power of operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.pow.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_192","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_177","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf32-vmrsqrtf32op","title":"<code>vm.rsqrt.f32</code> (VM::RsqrtF32Op)","text":"<p>Reciprocal of sqrt (1 / sqrt of the specified value)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rsqrt.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_193","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_178","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf64-vmrsqrtf64op","title":"<code>vm.rsqrt.f64</code> (VM::RsqrtF64Op)","text":"<p>Reciprocal of sqrt (1 / sqrt of the specified value)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rsqrt.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_194","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_179","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf32-vmsinf32op","title":"<code>vm.sin.f32</code> (VM::SinF32Op)","text":"<p>Sine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sin.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_195","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_180","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf64-vmsinf64op","title":"<code>vm.sin.f64</code> (VM::SinF64Op)","text":"<p>Sine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sin.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_196","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_181","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf32-vmsqrtf32op","title":"<code>vm.sqrt.f32</code> (VM::SqrtF32Op)","text":"<p>Sqrt of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sqrt.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_197","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_182","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf64-vmsqrtf64op","title":"<code>vm.sqrt.f64</code> (VM::SqrtF64Op)","text":"<p>Sqrt of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sqrt.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_198","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_183","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf32-vmtanhf32op","title":"<code>vm.tanh.f32</code> (VM::TanhF32Op)","text":"<p>Hyperbolic tangent of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.tanh.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_199","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_184","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf64-vmtanhf64op","title":"<code>vm.tanh.f64</code> (VM::TanhF64Op)","text":"<p>Hyperbolic tangent of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.tanh.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_200","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_185","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/VM/#vmglobaladdress-vmglobaladdressop","title":"<code>vm.global.address</code> (VM::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.address` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($result)\n</code></pre> <p>Returns an indirect address reference to the given global. During export the address will be converted to the natural format of the global table (for example, ordinals for refs and byte offsets for primitive types).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalAddressOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_186","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or a pointer-like reference"},{"location":"reference/mlir-dialects/VM/#vmglobalf32-vmglobalf32op","title":"<code>vm.global.f32</code> (VM::GlobalF32Op)","text":"<p>32-bit floating-point global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.f32` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>FloatAttr32-bit floating-point value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalf64-vmglobalf64op","title":"<code>vm.global.f64</code> (VM::GlobalF64Op)","text":"<p>64-bit floating-point global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.f64` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>FloatAttr64-bit floating-point value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali32-vmglobali32op","title":"<code>vm.global.i32</code> (VM::GlobalI32Op)","text":"<p>32-bit integer global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.i32` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>IntegerAttr32-bit integer value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali64-vmglobali64op","title":"<code>vm.global.i64</code> (VM::GlobalI64Op)","text":"<p>64-bit integer global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.i64` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>IntegerAttr64-bit integer value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf32-vmgloballoadf32op","title":"<code>vm.global.load.f32</code> (VM::GlobalLoadF32Op)","text":"<p>Global 32-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.f32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_187","title":"Results:","text":"Result Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf64-vmgloballoadf64op","title":"<code>vm.global.load.f64</code> (VM::GlobalLoadF64Op)","text":"<p>Global 64-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.f64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_188","title":"Results:","text":"Result Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi32-vmgloballoadi32op","title":"<code>vm.global.load.i32</code> (VM::GlobalLoadI32Op)","text":"<p>Global 32-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.i32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_189","title":"Results:","text":"Result Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi64-vmgloballoadi64op","title":"<code>vm.global.load.i64</code> (VM::GlobalLoadI64Op)","text":"<p>Global 64-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.i64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_190","title":"Results:","text":"Result Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf32-vmgloballoadindirectf32op","title":"<code>vm.global.load.indirect.f32</code> (VM::GlobalLoadIndirectF32Op)","text":"<p>Global 32-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.f32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_201","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;32-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#results_191","title":"Results:","text":"Result Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf64-vmgloballoadindirectf64op","title":"<code>vm.global.load.indirect.f64</code> (VM::GlobalLoadIndirectF64Op)","text":"<p>Global 64-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.f64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_202","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;64-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#results_192","title":"Results:","text":"Result Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti32-vmgloballoadindirecti32op","title":"<code>vm.global.load.indirect.i32</code> (VM::GlobalLoadIndirectI32Op)","text":"<p>Global 32-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.i32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_203","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;32-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#results_193","title":"Results:","text":"Result Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti64-vmgloballoadindirecti64op","title":"<code>vm.global.load.indirect.i64</code> (VM::GlobalLoadIndirectI64Op)","text":"<p>Global 64-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.i64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_204","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;64-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#results_194","title":"Results:","text":"Result Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectref-vmgloballoadindirectrefop","title":"<code>vm.global.load.indirect.ref</code> (VM::GlobalLoadIndirectRefOp)","text":"<p>Global ref load operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.ref` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a ref of the given type.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_205","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#results_195","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmgloballoadref-vmgloballoadrefop","title":"<code>vm.global.load.ref</code> (VM::GlobalLoadRefOp)","text":"<p>Global ref load operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.ref` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing a ref of the given type.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_196","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmglobalref-vmglobalrefop","title":"<code>vm.global.ref</code> (VM::GlobalRefOp)","text":"<p>Ref global declaration <p>Syntax:</p> <pre><code>operation ::= `vm.global.ref` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              `:` $type\n</code></pre> <p>Defines a global value that is a ref of a specific type. The global will retain the ref object for the lifetime of the context or until the value is replaced with a store or reset. Initialized to null unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref32-vmglobalstoref32op","title":"<code>vm.global.store.f32</code> (VM::GlobalStoreF32Op)","text":"<p>Global 32-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.f32` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_206","title":"Operands:","text":"Operand Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref64-vmglobalstoref64op","title":"<code>vm.global.store.f64</code> (VM::GlobalStoreF64Op)","text":"<p>Global 64-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.f64` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_207","title":"Operands:","text":"Operand Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei32-vmglobalstorei32op","title":"<code>vm.global.store.i32</code> (VM::GlobalStoreI32Op)","text":"<p>Global 32-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.i32` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_208","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei64-vmglobalstorei64op","title":"<code>vm.global.store.i64</code> (VM::GlobalStoreI64Op)","text":"<p>Global 64-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.i64` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_209","title":"Operands:","text":"Operand Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf32-vmglobalstoreindirectf32op","title":"<code>vm.global.store.indirect.f32</code> (VM::GlobalStoreIndirectF32Op)","text":"<p>Global 32-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.f32` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_210","title":"Operands:","text":"Operand Description <code>value</code> 32-bit float <code>global</code> 32-bit signless integer or ptr&lt;32-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf64-vmglobalstoreindirectf64op","title":"<code>vm.global.store.indirect.f64</code> (VM::GlobalStoreIndirectF64Op)","text":"<p>Global 64-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.f64` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_211","title":"Operands:","text":"Operand Description <code>value</code> 64-bit float <code>global</code> 32-bit signless integer or ptr&lt;64-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti32-vmglobalstoreindirecti32op","title":"<code>vm.global.store.indirect.i32</code> (VM::GlobalStoreIndirectI32Op)","text":"<p>Global 32-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.i32` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_212","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer <code>global</code> 32-bit signless integer or ptr&lt;32-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti64-vmglobalstoreindirecti64op","title":"<code>vm.global.store.indirect.i64</code> (VM::GlobalStoreIndirectI64Op)","text":"<p>Global 64-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.i64` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_213","title":"Operands:","text":"Operand Description <code>value</code> 64-bit signless integer <code>global</code> 32-bit signless integer or ptr&lt;64-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectref-vmglobalstoreindirectrefop","title":"<code>vm.global.store.indirect.ref</code> (VM::GlobalStoreIndirectRefOp)","text":"<p>Global ref stores operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.ref` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a ref to a global, retaining it until the global is reset. <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_214","title":"Operands:","text":"Operand Description <code>value</code> ref <code>global</code> 32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreref-vmglobalstorerefop","title":"<code>vm.global.store.ref</code> (VM::GlobalStoreRefOp)","text":"<p>Global ref stores operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.ref` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a ref to a global, retaining it until the global is reset. <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_215","title":"Operands:","text":"Operand Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#integer-arithmetic-ops","title":"Integer arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsi32-vmabsi32op","title":"<code>vm.abs.i32</code> (VM::AbsI32Op)","text":"<p>Integer absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_216","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_197","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmabsi64-vmabsi64op","title":"<code>vm.abs.i64</code> (VM::AbsI64Op)","text":"<p>Integer absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_217","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_198","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi32-vmaddi32op","title":"<code>vm.add.i32</code> (VM::AddI32Op)","text":"<p>Integer add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_218","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_199","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi64-vmaddi64op","title":"<code>vm.add.i64</code> (VM::AddI64Op)","text":"<p>Integer add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_219","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_200","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32s-vmdivi32sop","title":"<code>vm.div.i32.s</code> (VM::DivI32SOp)","text":"<p>Signed integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_220","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_201","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32u-vmdivi32uop","title":"<code>vm.div.i32.u</code> (VM::DivI32UOp)","text":"<p>Unsigned integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_221","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_202","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64s-vmdivi64sop","title":"<code>vm.div.i64.s</code> (VM::DivI64SOp)","text":"<p>Signed integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_222","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_203","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64u-vmdivi64uop","title":"<code>vm.div.i64.u</code> (VM::DivI64UOp)","text":"<p>Unsigned integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_223","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_204","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai32-vmfmai32op","title":"<code>vm.fma.i32</code> (VM::FMAI32Op)","text":"<p>Integer fused-multiply add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_224","title":"Operands:","text":"Operand Description <code>a</code> 32-bit signless integer <code>b</code> 32-bit signless integer <code>c</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_205","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai64-vmfmai64op","title":"<code>vm.fma.i64</code> (VM::FMAI64Op)","text":"<p>Integer fused-multiply add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_225","title":"Operands:","text":"Operand Description <code>a</code> 64-bit signless integer <code>b</code> 64-bit signless integer <code>c</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_206","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32s-vmmaxi32sop","title":"<code>vm.max.i32.s</code> (VM::MaxI32SOp)","text":"<p>Signed integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_226","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_207","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32u-vmmaxi32uop","title":"<code>vm.max.i32.u</code> (VM::MaxI32UOp)","text":"<p>Unsigned integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_227","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_208","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64s-vmmaxi64sop","title":"<code>vm.max.i64.s</code> (VM::MaxI64SOp)","text":"<p>Signed integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_228","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_209","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64u-vmmaxi64uop","title":"<code>vm.max.i64.u</code> (VM::MaxI64UOp)","text":"<p>Unsigned integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_229","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_210","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32s-vmmini32sop","title":"<code>vm.min.i32.s</code> (VM::MinI32SOp)","text":"<p>Signed integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_230","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_211","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32u-vmmini32uop","title":"<code>vm.min.i32.u</code> (VM::MinI32UOp)","text":"<p>Unsigned integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_231","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_212","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64s-vmmini64sop","title":"<code>vm.min.i64.s</code> (VM::MinI64SOp)","text":"<p>Signed integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_232","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_213","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64u-vmmini64uop","title":"<code>vm.min.i64.u</code> (VM::MinI64UOp)","text":"<p>Unsigned integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_233","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_214","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli32-vmmuli32op","title":"<code>vm.mul.i32</code> (VM::MulI32Op)","text":"<p>Integer multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_234","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_215","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli64-vmmuli64op","title":"<code>vm.mul.i64</code> (VM::MulI64Op)","text":"<p>Integer multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_235","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_216","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32s-vmremi32sop","title":"<code>vm.rem.i32.s</code> (VM::RemI32SOp)","text":"<p>Signed integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_236","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_217","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32u-vmremi32uop","title":"<code>vm.rem.i32.u</code> (VM::RemI32UOp)","text":"<p>Unsigned integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_237","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_218","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64s-vmremi64sop","title":"<code>vm.rem.i64.s</code> (VM::RemI64SOp)","text":"<p>Signed integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_238","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_219","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64u-vmremi64uop","title":"<code>vm.rem.i64.u</code> (VM::RemI64UOp)","text":"<p>Unsigned integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_239","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_220","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi32-vmsubi32op","title":"<code>vm.sub.i32</code> (VM::SubI32Op)","text":"<p>Integer subtract operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_240","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_221","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi64-vmsubi64op","title":"<code>vm.sub.i64</code> (VM::SubI64Op)","text":"<p>Integer subtract operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_241","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_222","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#integer-bit-manipulation-ops","title":"Integer bit manipulation ops","text":""},{"location":"reference/mlir-dialects/VM/#vmandi32-vmandi32op","title":"<code>vm.and.i32</code> (VM::AndI32Op)","text":"<p>Integer binary and operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.and.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_242","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_223","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmandi64-vmandi64op","title":"<code>vm.and.i64</code> (VM::AndI64Op)","text":"<p>Integer binary and operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.and.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_243","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_224","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi32-vmctlzi32op","title":"<code>vm.ctlz.i32</code> (VM::CtlzI32Op)","text":"<p>Counts the leading zeros in an integer value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ctlz.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_244","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_225","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi64-vmctlzi64op","title":"<code>vm.ctlz.i64</code> (VM::CtlzI64Op)","text":"<p>Counts the leading zeros in an integer value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ctlz.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_245","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_226","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti32-vmnoti32op","title":"<code>vm.not.i32</code> (VM::NotI32Op)","text":"<p>Integer binary not operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.not.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_246","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_227","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti64-vmnoti64op","title":"<code>vm.not.i64</code> (VM::NotI64Op)","text":"<p>Integer binary not operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.not.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_247","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_228","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori32-vmori32op","title":"<code>vm.or.i32</code> (VM::OrI32Op)","text":"<p>Integer binary or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.or.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_248","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_229","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori64-vmori64op","title":"<code>vm.or.i64</code> (VM::OrI64Op)","text":"<p>Integer binary or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.or.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_249","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_230","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori32-vmxori32op","title":"<code>vm.xor.i32</code> (VM::XorI32Op)","text":"<p>Integer binary exclusive-or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.xor.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_250","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_231","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori64-vmxori64op","title":"<code>vm.xor.i64</code> (VM::XorI64Op)","text":"<p>Integer binary exclusive-or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.xor.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_251","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_232","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#list-ops","title":"List ops","text":""},{"location":"reference/mlir-dialects/VM/#vmlistalloc-vmlistallocop","title":"<code>vm.list.alloc</code> (VM::ListAllocOp)","text":"<p>Allocates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.alloc` operands attr-dict `:` `(` type($initial_capacity) `)` `-&gt;` type($result)\n</code></pre> <p>Allocates a new typed list with a minimum initial_capacity.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_252","title":"Operands:","text":"Operand Description <code>initial_capacity</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_233","title":"Results:","text":"Result Description <code>result</code> list"},{"location":"reference/mlir-dialects/VM/#vmlistgetf32-vmlistgetf32op","title":"<code>vm.list.get.f32</code> (VM::ListGetF32Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.f32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_253","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_234","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgetf64-vmlistgetf64op","title":"<code>vm.list.get.f64</code> (VM::ListGetF64Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.f64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_254","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_235","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgeti32-vmlistgeti32op","title":"<code>vm.list.get.i32</code> (VM::ListGetI32Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.i32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_255","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_236","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgeti64-vmlistgeti64op","title":"<code>vm.list.get.i64</code> (VM::ListGetI64Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.i64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_256","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_237","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgetref-vmlistgetrefop","title":"<code>vm.list.get.ref</code> (VM::ListGetRefOp)","text":"<p>Ref type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.ref` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the ref value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_257","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_238","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmlistreserve-vmlistreserveop","title":"<code>vm.list.reserve</code> (VM::ListReserveOp)","text":"<p>Reserves capacity for list growth</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.reserve` operands attr-dict `:` `(` type($list) `,` type($minimum_capacity) `)`\n</code></pre> <p>Reserves storage for at least minimum_capacity elements. If the list already has at least the specified capacity the operation is ignored.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_258","title":"Operands:","text":"Operand Description <code>list</code> list <code>minimum_capacity</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistresize-vmlistresizeop","title":"<code>vm.list.resize</code> (VM::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.resize` operands attr-dict `:` `(` type($list) `,` type($new_size) `)`\n</code></pre> <p>Resizes the list to contain new_size elements. This will either truncate the list if the existing size is greater than new_size or extend the list with the default list value of 0 if storing primitives and null if refs.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_259","title":"Operands:","text":"Operand Description <code>list</code> list <code>new_size</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetf32-vmlistsetf32op","title":"<code>vm.list.set.f32</code> (VM::ListSetF32Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.f32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_260","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistsetf64-vmlistsetf64op","title":"<code>vm.list.set.f64</code> (VM::ListSetF64Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.f64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_261","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistseti32-vmlistseti32op","title":"<code>vm.list.set.i32</code> (VM::ListSetI32Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.i32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_262","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistseti64-vmlistseti64op","title":"<code>vm.list.set.i64</code> (VM::ListSetI64Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.i64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_263","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetref-vmlistsetrefop","title":"<code>vm.list.set.ref</code> (VM::ListSetRefOp)","text":"<p>Ref type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.ref` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new ref value (possibly null).</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_264","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> 32-bit signless integer <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmlistsize-vmlistsizeop","title":"<code>vm.list.size</code> (VM::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.size` operands attr-dict `:` `(` type($list) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_265","title":"Operands:","text":"Operand Description <code>list</code> list"},{"location":"reference/mlir-dialects/VM/#results_239","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#ref-comparison-ops","title":"Ref comparison ops","text":"<p>Comparison ops for <code>vm.ref</code>.</p>"},{"location":"reference/mlir-dialects/VM/#vmcmpeqref-vmcmpeqrefop","title":"<code>vm.cmp.eq.ref</code> (VM::CmpEQRefOp)","text":"<p>Ref equality comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.ref` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_266","title":"Operands:","text":"Operand Description <code>lhs</code> ref <code>rhs</code> ref"},{"location":"reference/mlir-dialects/VM/#results_240","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpneref-vmcmpnerefop","title":"<code>vm.cmp.ne.ref</code> (VM::CmpNERefOp)","text":"<p>Ref inequality comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.ref` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_267","title":"Operands:","text":"Operand Description <code>lhs</code> ref <code>rhs</code> ref"},{"location":"reference/mlir-dialects/VM/#results_241","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzref-vmcmpnzrefop","title":"<code>vm.cmp.nz.ref</code> (VM::CmpNZRefOp)","text":"<p>Ref non-zero comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.ref` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given ref operand for a non-zero/null value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_268","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_242","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/VM/#vmexport-vmexportop","title":"<code>vm.export</code> (VM::ExportOp)","text":"<p>Exports a function from the module</p> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal functions.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>export_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmfunc-vmfuncop","title":"<code>vm.func</code> (VM::FuncOp)","text":"<p>Function defined with VM control flow ops</p> <p>Represents a function containing VM ops and those of compatible dialects. All flow control is performed by VM ops.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>ordinal</code>::mlir::IntegerAttrordinal value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmimport-vmimportop","title":"<code>vm.import</code> (VM::ImportOp)","text":"<p>Imports a function from an external module</p> <p>Specifies a function that should be imported from either the runtime or an external VM module.</p> <p>Required imports can be declared with a minimum version of the module that contains the import. The maximum declared minimum version of all required imports from the module will become the required minimum version at runtime.</p> <p>Optional imports not present at runtime will be invalid to call and whether they were resolved can be queried with <code>vm.import.resolved</code>.</p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrordinal value <code>is_optional</code>::mlir::UnitAttrunit attribute <code>minimum_version</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vminitializer-vminitializerop","title":"<code>vm.initializer</code> (VM::InitializerOp)","text":"<p>Global initialization function</p> <p>A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>Util_InitializerOpInterface</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmmodule-vmmoduleop","title":"<code>vm.module</code> (VM::ModuleOp)","text":"<p>Module containing VM functions and variables</p> <p>Syntax:</p> <pre><code>operation ::= `vm.module` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>Top-level container for VM functions.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::VM::ModuleTerminatorOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal_counts</code>::mlir::iree_compiler::IREE::VM::OrdinalCountsAttr <code>version</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vmmodule_terminator-vmmoduleterminatorop","title":"<code>vm.module_terminator</code> (VM::ModuleTerminatorOp)","text":"<p>Terminator pseudo-op for the module op</p> <p>Syntax:</p> <pre><code>operation ::= `vm.module_terminator` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>Terminator</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_46","title":"Attributes","text":""},{"location":"reference/mlir-dialects/VM/#ordinalcountsattr","title":"OrdinalCountsAttr","text":"<p>Syntax:</p> <pre><code>#vm.ordinal_counts&lt;\n  int32_t,   # import_funcs\n  int32_t,   # export_funcs\n  int32_t,   # internal_funcs\n  int32_t,   # global_bytes\n  int32_t,   # global_refs\n  int32_t,   # rodatas\n  int32_t   # rwdatas\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/VM/#parameters","title":"Parameters:","text":"Parameter C++ type Description import_funcs <code>int32_t</code> export_funcs <code>int32_t</code> internal_funcs <code>int32_t</code> global_bytes <code>int32_t</code> global_refs <code>int32_t</code> rodatas <code>int32_t</code> rwdatas <code>int32_t</code>"},{"location":"reference/mlir-dialects/VMVX/","title":"VMVX","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvx-dialect","title":"'vmvx' Dialect","text":"<p>Vector extensions to the IREE VM.</p> <p>This is a reference dialect representing a simple IREE VM-based linear algebra module that is used as a library at runtime. The ops in this dialect map (roughly) 1:1 with the exported functions in the runtime module.</p> <p>See <code>vmvx.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'vmvx' Dialect<ul> <li>Operations<ul> <li>ABI ops<ul> <li>vmvx.binary (VMVX::BinaryOp)</li> <li>vmvx.copy (VMVX::CopyOp)</li> <li>vmvx.fill2d (VMVX::Fill2DOp)</li> <li>vmvx.unary (VMVX::UnaryOp)</li> </ul> </li> <li>Utility ops<ul> <li>vmvx.get_buffer_descriptor (VMVX::GetBufferDescriptorOp)</li> <li>vmvx.get_raw_interface_binding_buffer (VMVX::GetRawInterfaceBindingBufferOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/VMVX/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/VMVX/#abi-ops","title":"ABI ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxbinary-vmvxbinaryop","title":"<code>vmvx.binary</code> (VMVX::BinaryOp)","text":"<p>Performs a strided elementwise operation on two same-rank buffers</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.binary` `op` `` `(` $opcode `:` $element_type `)`\n              `lhs` `` `(` $lhs_buffer `offset` $lhs_offset `strides` `[` $lhs_strides `]` `:` type($lhs_buffer) `)`\n              `rhs` `` `(` $rhs_buffer `offset` $rhs_offset `strides` `[` $rhs_strides `]` `:` type($rhs_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              attr-dict\n</code></pre> <p>Performs the operation in-place as if: <pre><code>  OUT = OP(LHS, RHS)\n</code></pre></p> <p>Where <code>OP</code> is a concrete operation name as defined in ukernel/elementwise.h</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>opcode</code>::mlir::StringAttrstring attribute <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands","title":"Operands:","text":"Operand Description <code>lhs_buffer</code> a reference counted byte buffer <code>lhs_offset</code> index <code>lhs_strides</code> variadic of index <code>rhs_buffer</code> a reference counted byte buffer <code>rhs_offset</code> index <code>rhs_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxcopy-vmvxcopyop","title":"<code>vmvx.copy</code> (VMVX::CopyOp)","text":"<p>Copy from one buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.copy` `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              `:` $element_type\n              attr-dict\n</code></pre> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_1","title":"Operands:","text":"Operand Description <code>in_buffer</code> a reference counted byte buffer <code>in_offset</code> index <code>in_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxfill2d-vmvxfill2dop","title":"<code>vmvx.fill2d</code> (VMVX::Fill2DOp)","text":"<p>Fill a tile with a scalar</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.fill2d` `scalar` `` `(` $scalar `:` type($scalar) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `row_stride` $out_row_stride `:` type($out_buffer) `)`\n              `sizes` `` `(` $m `,` $n `)`\n              attr-dict\n</code></pre> <p>Fills a tile with dimensions [m, n] with a scalar.</p>"},{"location":"reference/mlir-dialects/VMVX/#operands_2","title":"Operands:","text":"Operand Description <code>scalar</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_row_stride</code> index <code>m</code> index <code>n</code> index"},{"location":"reference/mlir-dialects/VMVX/#vmvxunary-vmvxunaryop","title":"<code>vmvx.unary</code> (VMVX::UnaryOp)","text":"<p>Performs a strided elementwise unary operation</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.unary` `op` `` `(` $opcode `:` $element_type `)`\n              `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              attr-dict\n</code></pre> <p>Performs the operation in-place as if: <pre><code>  OUT = OP(IN)\n</code></pre></p> <p>Where <code>OP</code> is a concrete operation name as defined in ukernel/elementwise.h</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>opcode</code>::mlir::StringAttrstring attribute <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_3","title":"Operands:","text":"Operand Description <code>in_buffer</code> a reference counted byte buffer <code>in_offset</code> index <code>in_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxget_buffer_descriptor-vmvxgetbufferdescriptorop","title":"<code>vmvx.get_buffer_descriptor</code> (VMVX::GetBufferDescriptorOp)","text":"<p>Late binds a base buffer/offset/strides</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.get_buffer_descriptor` $source `:` type($source) `-&gt;` type(results) attr-dict\n</code></pre> <p>Queries a base buffer, offset and strides. This op is late bound to its source (alloca, binding, etc), allowing additional layers of transformations to be added as lowering progresses (or for buffers to be combined).</p> <p>This op has canonicalization rules which will bubble it up through the view stack. A final reconciliation pass is used explicitly to bind it to concrete sources.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameVariadicResultSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VMVX/#operands_4","title":"Operands:","text":"Operand Description <code>source</code> memref of any type values"},{"location":"reference/mlir-dialects/VMVX/#results","title":"Results:","text":"Result Description <code>base_buffer</code> a reference counted byte buffer <code>offset</code> index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxget_raw_interface_binding_buffer-vmvxgetrawinterfacebindingbufferop","title":"<code>vmvx.get_raw_interface_binding_buffer</code> (VMVX::GetRawInterfaceBindingBufferOp)","text":"<p>Gets the raw buffer associated with a binding</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.get_raw_interface_binding_buffer` `set` `(` $set `)` `binding` `(` $binding `)` attr-dict\n</code></pre> <p>Normally, a slice of a binding buffer is returned via hal.interface.binding.subspan. However, the normal VMVX lowering flow for this presumes that the result is a memref, and upon final conversion, it will offset the memref automatically to make it consistent.</p> <p>This op is used in situations where earlier in a lowering, we have fully resolved the binding to a buffer and would just like the raw backing buffer as passed to the interface.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>set</code>::mlir::IntegerAttrindex attribute <code>binding</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/VMVX/#results_1","title":"Results:","text":"Result Description <code>buffer</code> a reference counted byte buffer"},{"location":"community/blog/archive/2024/","title":"2024","text":""},{"location":"community/blog/archive/2021/","title":"2021","text":""},{"location":"community/blog/category/performance/","title":"Performance","text":""},{"location":"community/blog/category/platforms/","title":"Platforms","text":""},{"location":"community/blog/category/frontends/","title":"Frontends","text":""},{"location":"community/tags/","title":"Tags","text":"<p>Website pages sorted by tag:</p>"},{"location":"community/tags/#android","title":"Android","text":"<ul> <li>Android cross-compilation</li> <li>Android LLDB debugging</li> </ul>"},{"location":"community/tags/#cpu","title":"CPU","text":"<ul> <li>RISC-V cross-compilation</li> <li>IREE / MLIR / Linalg tutorial</li> <li>Exploring CPU microkernels on a matmul example</li> <li>Matrix Multiplication with MMT4D</li> <li>Profiling CPUs</li> <li>CPU - Bare-Metal</li> <li>CPU</li> </ul>"},{"location":"community/tags/#cuda","title":"CUDA","text":"<ul> <li>CUDA backend</li> <li>GPU debugging playbook</li> <li>CUDA HAL driver</li> <li>GPU - CUDA</li> </ul>"},{"location":"community/tags/#gpu","title":"GPU","text":"<ul> <li>CUDA backend</li> <li>Vulkan environment setup</li> <li>GPU debugging playbook</li> <li>CUDA HAL driver</li> <li>HIP HAL driver</li> <li>Metal HAL driver</li> <li>Profiling GPUs using Vulkan</li> <li>GPU - CUDA</li> <li>GPU - Metal</li> <li>GPU - ROCm</li> <li>GPU - Vulkan</li> </ul>"},{"location":"community/tags/#hip","title":"HIP","text":"<ul> <li>HIP HAL driver</li> </ul>"},{"location":"community/tags/#jax","title":"JAX","text":"<ul> <li>JAX</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#metal","title":"Metal","text":"<ul> <li>GPU debugging playbook</li> <li>Metal HAL driver</li> </ul>"},{"location":"community/tags/#onnx","title":"ONNX","text":"<ul> <li>ONNX</li> </ul>"},{"location":"community/tags/#pytorch","title":"PyTorch","text":"<ul> <li>ONNX</li> <li>PyTorch</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#python","title":"Python","text":"<ul> <li>JAX</li> <li>ONNX</li> <li>PyTorch</li> <li>TensorFlow</li> <li>TensorFlow Lite</li> <li>Python</li> </ul>"},{"location":"community/tags/#rocm","title":"ROCm","text":"<ul> <li>GPU debugging playbook</li> </ul>"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":"<ul> <li>TFLite support via TOSA</li> <li>TensorFlow</li> <li>TensorFlow Lite</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#vulkan","title":"Vulkan","text":"<ul> <li>Vulkan environment setup</li> <li>GPU debugging playbook</li> <li>Profiling GPUs using Vulkan</li> <li>GPU - Vulkan</li> </ul>"},{"location":"community/tags/#web","title":"Web","text":"<ul> <li>Building with Emscripten</li> </ul>"},{"location":"community/tags/#ios","title":"iOS","text":"<ul> <li>iOS cross-compilation</li> <li>GPU - Metal</li> </ul>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"IREE","text":"<p>IREE (Intermediate Representation Execution Environment<sup>1</sup>) is an MLIR-based end-to-end compiler and runtime that lowers Machine Learning (ML) models to a unified IR that scales up to meet the needs of the datacenter and down to satisfy the constraints and special considerations of mobile and edge deployments.</p>"},{"location":"#key-features","title":"Key features","text":"<ul> <li> <p> Ahead-of-time compilation</p> <p>Scheduling and execution logic are compiled together</p> <p> Project architecture</p> </li> <li> <p> Support for advanced model features</p> <p>Dynamic shapes, flow control, streaming, and more</p> <p> Importing from ML frameworks</p> </li> <li> <p> Designed for CPUs, GPUs, and other accelerators</p> <p>First class support for many popular devices and APIs</p> <p> Deployment configurations</p> </li> <li> <p> Low overhead, pipelined execution</p> <p>Efficient power and resource usage on server and edge devices</p> <p> Benchmarking</p> </li> <li> <p> Binary size as low as 30KB on embedded systems</p> <p> Running on bare-metal</p> </li> <li> <p> Debugging and profiling support</p> <p> Profiling with Tracy</p> </li> </ul>"},{"location":"#support-matrix","title":"Support matrix","text":"<p>IREE supports importing from a variety of ML frameworks:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and TensorFlow Lite</li> </ul> <p>The IREE compiler tools run on  Linux,  Windows, and  macOS and can generate efficient code for a variety of runtime platforms:</p> <ul> <li> Linux</li> <li> Windows</li> <li> macOS</li> <li> Android</li> <li> iOS</li> <li> Bare metal</li> <li> WebAssembly (experimental)</li> </ul> <p>and architectures:</p> <ul> <li> ARM</li> <li> x86</li> <li> RISC-V</li> </ul> <p>Support for hardware accelerators and APIs is also included:</p> <ul> <li> Vulkan</li> <li> CUDA</li> <li> ROCm</li> <li> Metal (for Apple silicon devices)</li> <li> AMD AIE (experimental)</li> <li> WebGPU (experimental)</li> </ul>"},{"location":"#project-architecture","title":"Project architecture","text":"<p>IREE adopts a holistic approach towards ML model compilation: the IR produced contains both the scheduling logic, required to communicate data dependencies to low-level parallel pipelined hardware/API like Vulkan, and the execution logic, encoding dense computation on the hardware in the form of hardware/API-specific binaries like SPIR-V.</p> <p> </p>"},{"location":"#workflow-overview","title":"Workflow overview","text":"<p>Using IREE involves the following general steps:</p> <ol> <li> <p>Import your model</p> <p>Develop your program using one of the supported frameworks, then import into IREE</p> </li> <li> <p>Select your deployment configuration</p> <p>Identify your target platform, accelerator(s), and other constraints</p> </li> <li> <p>Compile your model</p> <p>Compile through IREE, picking settings based on your deployment configuration</p> </li> <li> <p>Run your model</p> <p>Use IREE's runtime components to execute your compiled model</p> </li> </ol>"},{"location":"#importing-models-from-ml-frameworks","title":"Importing models from ML frameworks","text":"<p>IREE supports importing models from a growing list of ML frameworks and model formats:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and    TensorFlow Lite</li> </ul>"},{"location":"#selecting-deployment-configurations","title":"Selecting deployment configurations","text":"<p>IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE for dynamic model deployments taking advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.</p> <ul> <li>What platforms are you targeting? Desktop? Mobile? An embedded system?</li> <li>What hardware should the bulk of your model run on? CPU? GPU?</li> <li>How fixed is your model itself? Can the weights be changed? Do you want   to support loading different model architectures dynamically?</li> </ul> <p>IREE supports the full set of these configurations using the same underlying technology.</p>"},{"location":"#compiling-models","title":"Compiling models","text":"<p>Model compilation is performed ahead-of-time on a host machine for any combination of targets. The compilation process converts from layers and operators used by high level frameworks down into optimized native code and associated scheduling logic.</p> <p>For example, compiling for GPU execution using Vulkan generates SPIR-V kernels and Vulkan API calls. For CPU execution, native code with static or dynamic linkage and the associated function calls are generated.</p>"},{"location":"#running-models","title":"Running models","text":"<p>IREE offers a low level C API, as well as several sets of API bindings for compiling and running programs using various languages.</p>"},{"location":"#communication-channels","title":"Communication channels","text":"<ul> <li> GitHub issues: Feature requests,   bugs, and other work tracking</li> <li> IREE Discord server: Daily development   discussions with the core team and collaborators</li> <li> iree-discuss email list:   Announcements, general and low-priority discussion</li> </ul>"},{"location":"#roadmap","title":"Roadmap","text":"<p>IREE is in the early stages of development and is not yet ready for broad adoption. We use both GitHub Projects and GitHub Milestones to track progress.</p> <ol> <li> <p>Pronounced \"eerie\" and often styled with the   emoji\u00a0\u21a9</p> </li> </ol>"},{"location":"building-from-source/","title":"Building from source","text":"<p>While IREE does offer binary distributions for its compiler tools and Python bindings, building from source is still useful when using IREE's runtime or when making changes to the compiler or import tools themselves.</p>"},{"location":"building-from-source/#reference-pages","title":"Reference pages","text":"<ul> <li>Getting started</li> <li>Android cross-compilation</li> <li>iOS cross-compilation</li> <li>RISC-V cross-compilation</li> </ul>"},{"location":"building-from-source/android/","title":"Android cross-compilation","text":"<p>Running on a platform like Android involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific Android version and system architecture):</p> <ul> <li>IREE's compiler is built on the host and is used there to generate modules   for the target</li> <li>IREE's runtime is built on the host for the target. The runtime is then   either pushed to the target to run natively or is bundled into an Android   APK</li> </ul>","tags":["Android"]},{"location":"building-from-source/android/#prerequisites","title":"Prerequisites","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-environment-setup","title":"Host environment setup","text":"<p>You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.</p>","tags":["Android"]},{"location":"building-from-source/android/#install-android-ndk-and-adb","title":"Install Android NDK and ADB","text":"<p>The Android Native Developer Kit (NDK) is needed to use native C/C++ code on Android. You can download it here, or, if you have installed Android Studio, you can follow this guide instead.</p> <p>Note</p> <p>Make sure the <code>ANDROID_NDK</code> environment variable is set after installing the NDK.</p> <p>ADB (the Android Debug Bridge) is also needed to communicate with Android devices from the command line. Install it following the official user guide.</p>","tags":["Android"]},{"location":"building-from-source/android/#configure-and-build","title":"Configure and build","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-configuration","title":"Host configuration","text":"<p>Build and install on your host machine:</p> <pre><code>cmake -GNinja -B ../iree-build/ \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  .\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["Android"]},{"location":"building-from-source/android/#target-configuration","title":"Target configuration","text":"<p>Build the runtime using the Android NDK toolchain:</p>  Linux macOS Windows <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <pre><code>cmake -GNinja -B ../iree-build-android/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"%ANDROID_NDK%/build/cmake/android.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=\"%CD%/../iree-build/install/bin\" \\\n  -DANDROID_ABI=\"arm64-v8a\" \\\n  -DANDROID_PLATFORM=\"android-29\" \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\ncmake --build ../iree-build-android/\n</code></pre> <p>Note</p> <p>See the Android NDK CMake guide and Android Studio CMake guide for details on configuring CMake for Android.</p> <p>The specific <code>ANDROID_ABI</code> and <code>ANDROID_PLATFORM</code> used should match your target device.</p>","tags":["Android"]},{"location":"building-from-source/android/#running-android-tests","title":"Running Android tests","text":"<p>Make sure you enable developer options and USB debugging on your Android device and can see your it when you run <code>adb devices</code>, then run all tests through ctest:</p> <pre><code># Build test dependencies\ncmake --build ../iree-build-android/ --target iree-test-deps\n\n# Ensure that your Android device is visible\nadb devices\n\n# Run tests\nctest --test-dir ../iree-build-android/ --output-on-failure\n</code></pre> <p>This will automatically upload build artifacts to the connected Android device, run the tests, then report the status back to your host machine.</p>","tags":["Android"]},{"location":"building-from-source/android/#running-tools-directly","title":"Running tools directly","text":"<p>Invoke the host compiler tools to produce a bytecode module FlatBuffer:</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>Push the Android runtime tools to the device, along with any FlatBuffer files:</p> <pre><code>adb push ../iree-build-android/tools/iree-run-module /data/local/tmp/\nadb shell chmod +x /data/local/tmp/iree-run-module\nadb push /tmp/simple_abs_vmvx.vmfb /data/local/tmp/\n</code></pre> <p>Run the tool:</p> <pre><code>adb shell /data/local/tmp/iree-run-module --device=local-task \\\n  --module=/data/local/tmp/simple_abs_vmvx.vmfb \\\n  --function=abs \\\n  --input=\"f32=-5\"\n</code></pre>","tags":["Android"]},{"location":"building-from-source/getting-started/","title":"Getting started","text":""},{"location":"building-from-source/getting-started/#prerequisites","title":"Prerequisites","text":"<p>IREE can be built from source using CMake. We also recommend the Ninja CMake generator and the clang or MSVC C/C++ compilers.</p> Note - Other CMake generators and compilers <p>IREE developers and CIs primarily use Ninja, clang, and MSVC. Other configurations (including the Makefile generator and gcc) are \"best effort\". Patches to improve support are always welcome.</p>  Linux macOS Windows <ol> <li> <p>Install a compiler/linker (typically \"clang\" and \"lld\" package)</p> </li> <li> <p>Install CMake (typically \"cmake\" package)</p> </li> <li> <p>Install Ninja (typically \"ninja-build\"    package)</p> </li> </ol> <p>On Debian/Ubuntu:</p> <pre><code>sudo apt install cmake ninja-build clang lld\n</code></pre> <ol> <li> <p>Install CMake</p> </li> <li> <p>Install Ninja</p> </li> </ol> <p>If using Homebrew:</p> <pre><code>brew install cmake ninja\n</code></pre> <ol> <li> <p>Install MSVC from Visual Studio or \"Tools for Visual Studio\" on the    official downloads page</p> </li> <li> <p>Install CMake from the    official downloads page</p> </li> <li> <p>Install Ninja from the official site</p> </li> </ol> <p>Note</p> <p>Initialize MSVC by running <code>vcvarsall.bat</code> to build on the command line. See the official documentation for details.</p>"},{"location":"building-from-source/getting-started/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":"<p>Use Git to clone the IREE repository and initialize its submodules:</p> <pre><code>git clone https://github.com/iree-org/iree.git\ncd iree\ngit submodule update --init\n</code></pre> <p>The most basic CMake workflow is:</p> <pre><code># Configure\ncmake -G Ninja -B ../iree-build/ .\n\n# Build\ncmake --build ../iree-build/\n</code></pre> <p>Caution - slow builds</p> <p>The compiler build is complex. You will want a powerful machine and to tune the settings following the next section. In 2023, we've seen builds take around 5-10 minutes on 64-core Linux machines.</p> <p>Use case permitting, disabling the compiler build with <code>-DIREE_BUILD_COMPILER=OFF</code> will drastically simplify the build.</p>"},{"location":"building-from-source/getting-started/#configuration-settings","title":"Configuration settings","text":"<p>The configure step should be customized for your build environment. These settings can improve compile and link times substantially.</p>  Linux macOS Windows <pre><code># Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON \\\n    -DIREE_ENABLE_SPLIT_DWARF=ON \\\n    -DIREE_ENABLE_THIN_ARCHIVES=ON \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DIREE_ENABLE_LLD=ON\n</code></pre> <pre><code># Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON \\\n    -DIREE_ENABLE_SPLIT_DWARF=ON \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DIREE_ENABLE_LLD=ON\n</code></pre> <p>It is also possible to add <code>-DIREE_ENABLE_THIN_ARCHIVES=ON</code> if the <code>CMAKE_AR</code> variable is defined and points to the path of either the GNU binutils or LLVM <code>ar</code> program, overriding the default Apple <code>ar</code>.</p> <pre><code># Recommended development options:\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_ASSERTIONS=ON\n</code></pre> Tip - CMAKE_BUILD_TYPE values <p>We recommend using the <code>RelWithDebInfo</code> build type by default for a good balance of debug info and performance. The <code>Debug</code>, <code>Release</code>, and <code>MinSizeRel</code> build types are useful in more specific cases. Note that several useful LLVM debugging features are only available in <code>Debug</code> builds. See the official CMake documentation for general details.</p> Tip - Faster recompilation with ccache <p>We recommend using <code>ccache</code> with CMake, especially when rebuilding the compiler. To use it, configure CMake with:</p> <pre><code>-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n</code></pre> <p>See also our developer documentation for ccache.</p>"},{"location":"building-from-source/getting-started/#optional-components","title":"Optional components","text":"<p>By default, the CMake build includes:</p> <ul> <li>All compiler targets (<code>llvm-cpu</code>, <code>cuda</code>, <code>vulkan-spirv</code>, etc.)</li> <li>All runtime HAL drivers (<code>local-task</code>, <code>cuda</code>, <code>vulkan</code>, etc.)</li> <li>All compiler input formats (StableHLO, TOSA, etc.)</li> <li>All compiler output formats (VM bytecode, C)</li> </ul> <p>The default build does not include:</p> <ul> <li>Compiler or runtime bindings (Python, TFLite, etc.)</li> <li>Advanced features like AddressSanitizer or tracing instrumentation</li> <li>Experimental components</li> </ul> <p>These can be changed via the <code>IREE_</code> CMake options listed in the root <code>CMakeLists.txt</code>.</p>"},{"location":"building-from-source/getting-started/#extensions-and-integrations","title":"Extensions and integrations","text":"<p>When using IREE within other projects, you can register compiler plugins and runtime HAL drivers. You can also bring your own copy of LLVM and some other tools. See the root <code>CMakeLists.txt</code> for details.</p>"},{"location":"building-from-source/getting-started/#tests-and-samples","title":"Tests and samples","text":""},{"location":"building-from-source/getting-started/#running-tests","title":"Running tests","text":"<p>Tests are run via ctest. To build and run the core project tests:</p> <pre><code># Build default targets\ncmake --build ../iree-build/\n\n# Run tests\nctest --test-dir ../iree-build/\n</code></pre> <p>Caution</p> <p>This has two limitations:</p> <ol> <li>Large tests are excluded from the build by default</li> <li>Some tests require hardware like a GPU and will fail on unsupported systems</li> </ol> <p>To build and then run all tests:</p> <pre><code># 1. Build default targets\ncmake --build ../iree-build/\n\n# 2. Build test dependencies\ncmake --build ../iree-build/ --target iree-test-deps\n\n# 3. Run tests\nctest --test-dir ../iree-build/\n\n\n# Or combine all steps using a utility target\ncmake --build ../iree-build --target iree-run-tests\n</code></pre> <p>To run only certain tests, we have a helper script that converts environment variables into ctest filters:</p> <pre><code># Run default tests\n./build_tools/cmake/ctest_all.sh ../iree-build\n\n# Run tests, turning CUDA on and Vulkan off\nexport IREE_CUDA_DISABLE=0\nexport IREE_VULKAN_DISABLE=1\n./build_tools/cmake/ctest_all.sh ../iree-build\n</code></pre>"},{"location":"building-from-source/getting-started/#running-samples","title":"Running samples","text":"<pre><code># Build\ncmake --build ../iree-build/\n\n# Run a standalone sample application\n../iree-build/runtime/src/iree/runtime/demo/hello_world_embedded\n# 4xf32=1 1.1 1.2 1.3\n#  *\n# 4xf32=10 100 1000 10000\n#  =\n# 4xf32=10 110 1200 13000\n\n# Try out the developer tools\nls ../iree-build/tools/\n../iree-build/tools/iree-compile --help\n../iree-build/tools/iree-run-module --help\n</code></pre>"},{"location":"building-from-source/getting-started/#python-bindings","title":"Python bindings","text":"<p>Python packages can either be built from source or installed from our releases. See the Python bindings page for details about the bindings themselves.</p>"},{"location":"building-from-source/getting-started/#dependencies","title":"Dependencies","text":"<p>You will need a recent Python installation &gt;=3.9 (we aim to support non-eol Python versions).</p> Tip - Managing Python versions <p>Make sure your 'python' is what you expect:</p>  Linux macOS Windows <p>Note that on multi-python systems, this may have a version suffix, and on many Linuxes where python2 and python3 can co-exist, you may also want to use <code>python3</code>.</p> <pre><code>which python\npython --version\n</code></pre> <p>Note that on multi-python systems, this may have a version suffix, and on macOS where python2 and python3 can co-exist, you may also want to use <code>python3</code>.</p> <pre><code>which python\npython --version\n</code></pre> <p>The Python launcher for Windows (<code>py</code>) can help manage versions.</p> <pre><code>which python\npython --version\npy --list-paths\n</code></pre> Tip - Virtual environments <p>We recommend using virtual environments to manage python packages, such as through <code>venv</code> (about, tutorial):</p>  Linux macOS Windows <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\n.venv\\Scripts\\activate.bat\n</code></pre> <p>When done, run <code>deactivate</code>.</p> <pre><code># Upgrade PIP before installing other requirements\npython -m pip install --upgrade pip\n\n# Install IREE build requirements\npython -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre>"},{"location":"building-from-source/getting-started/#building-with-cmake","title":"Building with CMake","text":"<p>To build the Python bindings, configure CMake with the <code>IREE_BUILD_PYTHON_BINDINGS</code> option. We also recommend explicitly setting which Python executable to use with <code>Python3_EXECUTABLE</code>:</p> <pre><code># Configure (including other options as discussed above)\ncmake -G Ninja -B ../iree-build/ \\\n  -DIREE_BUILD_PYTHON_BINDINGS=ON  \\\n  -DPython3_EXECUTABLE=\"$(which python)\" \\\n  .\n\n# Build\ncmake --build ../iree-build/\n</code></pre>"},{"location":"building-from-source/getting-started/#using-the-python-bindings","title":"Using the Python bindings","text":"<p>Extend your <code>PYTHONPATH</code> with IREE's <code>bindings/python</code> paths and try importing:</p>  Linux macOS Windows <pre><code>source ../iree-build/.env &amp;&amp; export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <pre><code>source ../iree-build/.env &amp;&amp; export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <pre><code>..\\iree-build\\.env.ps1  # or ..\\iree-build\\.env.bat\n# The 'PYTHONPATH' environment variable should now contain\n#   iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n</code></pre> <p>Using IREE's ML framework importers requires a few extra steps:</p> <pre><code># Install test requirements\npython -m pip install -r integrations/tensorflow/test/requirements.txt\n\n# Install pure Python packages (no build required)\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n\n# Then test the tools:\niree-import-tf --help\niree-import-tflite --help\n</code></pre>"},{"location":"building-from-source/ios/","title":"iOS cross-compilation","text":"<p>Cross-compilation for iOS consists of the two steps below.</p> <ul> <li>On the macOS host, build the IREE compiler.  We can run it to create   IREE modules.</li> <li>Build the IREE runtime on the macOS host for iOS devices and the   simulator.  We can then run the IREE module on the simulator.</li> </ul>","tags":["iOS"]},{"location":"building-from-source/ios/#prerequisites","title":"Prerequisites","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#install-xcode-and-ios-sdk","title":"Install Xcode and iOS SDK","text":"<p>For cross-compilation, you need Xcode. It comes with the SDKs for iOS devices and the simulator, as well as the <code>simctl</code> tool for controlling the simulator from the command line.</p>","tags":["iOS"]},{"location":"building-from-source/ios/#host-environment-setup","title":"Host environment setup","text":"<p>On your host platform, you should already be able to build IREE from source.  Please make sure you've gone through the steps in getting started.</p>","tags":["iOS"]},{"location":"building-from-source/ios/#configure-and-build","title":"Configure and build","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#build-the-iree-compiler-for-the-host","title":"Build the IREE compiler for the Host","text":"<p>Build and install on your macOS host:</p> <pre><code>cmake -S . -B ../iree-build/ -GNinja \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install\n\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/ios/#cross-compile-the-iree-runtime-for-ios","title":"Cross-compile the IREE runtime for iOS","text":"<p>Build the runtime for the iOS Simulator.</p> <pre><code>cmake -S . -B ../build-ios-sim -GNinja \\\n  -DCMAKE_SYSTEM_NAME=iOS \\\n  -DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphonesimulator Path) \\\n  -DCMAKE_OSX_ARCHITECTURES=arm64 \\\n  -DCMAKE_SYSTEM_PROCESSOR=arm64 \\\n  -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 \\\n  -DCMAKE_IOS_INSTALL_COMBINED=YES \\\n  -DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n  -DCMAKE_INSTALL_PREFIX=../build-ios-sim/install \\\n  -DIREE_BUILD_COMPILER=OFF\n\ncmake --build ../build-ios-sim --config Release --target install\n</code></pre> <p>Or, we can build the runtime for iOS devices it by changing the value of the <code>-DCMAKE OSX SYSROOT</code> option to:</p> <pre><code>  -DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphoneos Path)\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/ios/#running-iree-modules-on-the-ios-simulator","title":"Running IREE modules on the iOS Simulator","text":"<p>Run the IREE compiler on the host to generate a module.</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>We could test the generated module by running the macOS version of <code>iree-run-module</code> on the host.</p> <pre><code>../iree-build/install/bin/iree-run-module \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=\"f32=-5\"\n</code></pre> <p>To run it on the iOS simulator, we need to copy the vmfb file into the <code>iree-run-module</code> iOS app bundle.</p> <pre><code>cp /tmp/simple_abs_vmvx.vmfb \\\n   ../build-ios-sim/install/bin/iree-run-module.app/\n</code></pre> <p>Open the iOS Simulator Manager on the host.</p> <pre><code>open -a Simulator\n</code></pre> <p>After creating and booting a simulator in this app, you can list it from the command-line.</p> <pre><code>xcrun simctl list devices | grep Booted\n</code></pre> <p>This is what should come out of the command:</p> <pre><code>    iPhone 14 Pro (12341234-ABCD-ABCD-ABCD-123412341234) (Booted)\n</code></pre> <p>where <code>iPhone 14 Pro</code> is the device being simulated and <code>12341234-ABCD-ABCD-ABCD-123412341234</code> is the simulator's unique device ID (UDID).</p> <p>Install the app <code>iree-run-module</code> on the simulator, given its UDID.</p> <pre><code>xcrun simctl install &lt;UDID&gt; ../build-ios-sim/install/bin/iree-run-module.app\n</code></pre> <p>Check the path to the installed bundle, where the <code>simple_abs_vmvx.vmfb</code> module should be found.</p> <pre><code>ls $(xcrun simctl get_app_container &lt;UDID&gt; dev.iree.iree-run-module)\n</code></pre> <p>The string <code>dev.iree.iree-run-module</code> is the bundle identifier of the iOS app.  The CMake building process generates it and saves it in the property list (plist) file <code>../build-ios-sim/install/bin/iree-run-module.app/Info.plist</code>.</p> <p>Launch the <code>iree-run-module</code> app on the simulator to run the IREE module <code>simple_abs_vmvx.vmfb</code>.</p> <pre><code>xcrun simctl launch --console \\\n  &lt;UDID&gt; \\\n  dev.iree.runmodule \\\n  --device=local-task \\\n  --function=abs \\\n  --input=\"f32=-5\" \\\n  --module=$(xcrun simctl get_app_container &lt;UDID&gt; dev.iree.iree-run-module)/simple_abs_vmvx.vmfb\n</code></pre>","tags":["iOS"]},{"location":"building-from-source/riscv/","title":"RISC-V cross-compilation","text":"<p>Running on a platform like RISC-V involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific RISC-V CPU architecture and operating system):</p> <ul> <li>IREE's compiler is built on the host and is used there to generate modules   for the target</li> <li>IREE's runtime is built on the host for the target. The runtime is then   pushed to the target to run natively.</li> </ul>","tags":["CPU"]},{"location":"building-from-source/riscv/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-environment-setup","title":"Host environment setup","text":"<p>You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#install-risc-v-cross-compile-toolchain-and-emulator","title":"Install RISC-V cross-compile toolchain and emulator","text":"<p>You'll need a RISC-V LLVM compilation toolchain and a RISC-V enabled QEMU emulator.</p> <p>See instructions in the following links</p> <ul> <li>Clang getting started</li> <li>RISC-V GNU toolchain</li> <li>QEMU</li> <li>RISC-V Linux QEMU</li> </ul> <p>Note</p> <p>The <code>RISCV_TOOLCHAIN_ROOT</code> environment variable needs to be set to the root directory of the installed GNU toolchain when building the RISC-V compiler target and the runtime library.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#install-prebuilt-risc-v-tools-risc-v-64-bit-linux-toolchain","title":"Install prebuilt RISC-V tools (RISC-V 64-bit Linux toolchain)","text":"<p>Execute the following script to download the prebuilt RISC-V toolchain and QEMU from the IREE root directory:</p> <pre><code>./build_tools/riscv/riscv_bootstrap.sh\n</code></pre> <p>Note</p> <p>The prebuilt toolchain is built with AlmaLinux release 8.8 docker It requires glibc &gt;= 2.28 for your host machine.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#support-vector-extension","title":"Support vector extension","text":"<p>For RISC-V vector extensions support, see additional instructions</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#configure-and-build","title":"Configure and build","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-configuration","title":"Host configuration","text":"<p>Build and install on your host machine:</p> <pre><code>cmake -GNinja -B ../iree-build/ \\\n  -DCMAKE_C_COMPILER=clang \\\n  -DCMAKE_CXX_COMPILER=clang++ \\\n  -DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  .\ncmake --build ../iree-build/ --target install\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#target-configuration","title":"Target configuration","text":"<p>The following instruction shows how to build for a RISC-V 64-bit Linux machine. For other RISC-V targets, please refer to riscv.toolchain.cmake as a reference of how to set up the cmake configuration.</p>","tags":["CPU"]},{"location":"building-from-source/riscv/#risc-v-64-bit-linux-target","title":"RISC-V 64-bit Linux target","text":"<pre><code>cmake -GNinja -B ../iree-build-riscv/ \\\n  -DCMAKE_TOOLCHAIN_FILE=\"./build_tools/cmake/riscv.toolchain.cmake\" \\\n  -DIREE_HOST_BIN_DIR=$(realpath ../iree-build/install/bin) \\\n  -DRISCV_CPU=linux-riscv_64 \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  -DRISCV_TOOLCHAIN_ROOT=${RISCV_TOOLCHAIN_ROOT} \\\n  -DIREE_ENABLE_CPUINFO=OFF \\\n  .\ncmake --build ../iree-build-riscv/\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#running-iree-bytecode-modules-on-the-risc-v-system","title":"Running IREE bytecode modules on the RISC-V system","text":"<p>Note</p> <p>The following instructions are meant for the RISC-V 64-bit Linux target. For the bare-metal target, please refer to simple_embedding to see how to build a ML workload for a bare-metal machine.</p> <p>Set the path to qemu-riscv64 Linux emulator binary in the <code>QEMU_BIN</code> environment variable. If it is installed with <code>riscv_bootstrap.sh</code>, the path is default at ${HOME}/riscv/qemu/linux/RISCV/bin/qemu-riscv64.</p> <pre><code>export QEMU_BIN=&lt;path to qemu-riscv64 binary&gt;\n</code></pre> <p>Invoke the host compiler tools to produce a bytecode module FlatBuffer:</p> <pre><code>../iree-build/install/bin/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre> <p>Run the RISC-V emulation:</p> <pre><code>${QEMU_BIN} \\\n  -cpu rv64 \\\n  -L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n  ../iree-build-riscv/tools/iree-run-module \\\n  --device=local-task \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --function=abs \\\n  --input=f32=-5\n</code></pre>","tags":["CPU"]},{"location":"building-from-source/riscv/#optional-configuration","title":"Optional configuration","text":"<p>RISC-V Vector extensions allows SIMD  code to run more efficiently. To enable the vector extension for the compiler  toolchain and the emulator, build the tools from the following sources:</p> <ul> <li>RISC-V toolchain is built from https://github.com/llvm/llvm-project.<ul> <li>Currently, the LLVM compiler is built on GNU toolchain, including libgcc,   GNU linker, and C libraries. You need to build GNU toolchain first.</li> <li>Clone GNU toolchain from:   https://github.com/riscv/riscv-gnu-toolchain.   Switch the \"riscv-binutils\" submodule to   <code>git://sourceware.org/git/binutils-gdb.git</code> manually.</li> </ul> </li> <li>RISC-V QEMU is built from https://gitlab.com/qemu-project/qemu/tree/v8.1.2.</li> </ul> <p>The SIMD code can be generated following the IREE CPU flow with the additional command-line flags</p> <pre><code>tools/iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-target-triple=riscv64 \\\n  --iree-llvmcpu-target-abi=lp64d \\\n  --iree-llvmcpu-target-cpu-features=\"+m,+a,+f,+d,+zvl512b,+v\" \\\n  --riscv-v-fixed-length-vector-lmul-max=8 \\\n  iree_input.mlir -o mobilenet_cpu.vmfb\n</code></pre> <p>Then run on the RISC-V QEMU:</p> <pre><code>${QEMU_BIN} \\\n  -cpu rv64,Zve64d=true,vlen=512,elen=64,vext_spec=v1.0 \\\n  -L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n  ../iree-build-riscv/tools/iree-run-module \\\n  --device=local-task \\\n  --module=mobilenet_cpu.vmfb \\\n  --function=predict \\\n  --input=\"1x224x224x3xf32=0\"\n</code></pre>","tags":["CPU"]},{"location":"community/","title":"Community projects","text":"<p>Projects built by community members:</p> <ul> <li> <p>The SHARK and   SRT projects offer highly tuned performance   and user interfaces for running a large corpus of machine learning programs.</p> </li> <li> <p>The SHARK-Turbine project provides   tools for bridging between PyTorch and IREE.</p> </li> <li> <p>The IREE Bare-Metal Arm Sample   shows how to build IREE with the   Arm GNU Toolchain   for bare-metal Arm targets using the open-source firmware libraries   CMSIS and   libopencm3.</p> </li> <li> <p>The IREE C++ Template   shows one way to integrate IREE's runtime into a project with CMake.</p> </li> </ul> <p>Official repositories:</p> <ul> <li> <p>iree-jax is home to   IREE's AOT support for JAX programs.</p> </li> <li> <p>iree-experimental   includes various samples and prototypes built with IREE.</p> </li> <li> <p>iree-llvm-sandbox   contains experimental work by the IREE team closely related to LLVM and   MLIR, usually with the aim of contributing back to those upstream projects.</p> </li> </ul>"},{"location":"community/blog/","title":"Blog","text":"<p>Updates from the IREE team</p>"},{"location":"community/blog/2021-10-15-cuda-backend/","title":"CUDA backend","text":"<p>IREE is being designed with re-targetability as a core goal: it should be possible to use IREE to target a broad spectrum of power regimes, from embedded systems to distributed clusters; and it should be possible to extend IREE to target new back-ends without having to reinvent the wheel each time.</p> <p>To explore this, we recently branched out from our initial focus on low-latency mobile deployments with a goal of using IREE to target data center workloads on Nvidia CUDA. This post describes how we quickly brought up a CUDA back-end for IREE and used it to train BERT, then shares some metrics and next steps.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#bring-up","title":"Bring up","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hal-support","title":"HAL support","text":"<p>IREE has a HAL API that abstract all the targets behind a common interface. The first step to supporting a CUDA target was to map the HAL API onto CUDA. We use the CUDA driver API to reduce dependencies and be closer to the hardware. The HAL API is based on other GPU APIs like Vulkan and Metal, so it was a natural fit for CUDA. The HAL API exposes memory allocations, basic fill and memset commands, kernel dispatch, and general command buffer handling. The original implementation uses the CUDA graph API as a graph maps naturally to command buffers. There is also an implementation using CUDA streams for comparison.</p> <p>HAL exposes an API that can be tested independently, even if we are not able to create CUDA kernels yet we can test a large portion of the CUDA driver using CTS tests. Those can be run to make sure a system has the required CUDA support.</p> <p></p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#compiler-support","title":"Compiler support","text":"<p>CUDA has an open source backend in LLVM generating PTX that we are leveraging. Therefore IREE can create NVVM (CUDA LLVM variant) and use LLVM's backend to generate PTX. The CUDA driver will do the \"last mile compilation\" at runtime to convert PTX into the GPU's native ISA.</p> <p>IREE compiler pipeline starts from linalg with tensor operands. A large part of the compiler is independent of the target.</p> <p>The linalg on tensor representation of the graph is broken up into dispatch regions that are processed by NVVM Codegen. A simple implementation of the compiler is to run bufferization and convert linalg to standard followed by conversion to NVVM/LLVM. Most of those transformation can re-use upstream MLIR transformations and share it with any other backend targeting LLVM IR. Leveraging MLIR conversion to LLVM will allow us to quickly go from a simple \"hello world\" to supporting full models.</p> <p>IREE code generation is based on MLIR infrastructure so each step can easily be tested independently using the MLIR lit framework.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#flatbuffer-definition","title":"FlatBuffer definition","text":"<p>Kernels are encoded in a FlatBuffer containing the PTX code as well as the workgroup size to use for the dispatch. This allows serialization of the kernels in the IR, it is then de-serialized by the HAL layer.</p> <pre><code>table CUDAExecutableDef {\n  // A map of entry point ordinals to string names as used in the shader\n  // library.\n  entry_points:[string];\n\n  // Block sizes for each entry point.\n  block_sizes:[CUDABlockSizeDef];\n\n  // PTX string of the module.\n  ptx_image:string;\n}\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hello-world","title":"Hello world","text":"<p>Together those 3 steps are enough to provide most of the functionality and we can now successfully compile full models.</p> <p></p> <p>To reproduce running a simple op end to end through CUDA backend, save the following mlir in <code>/tmp/add.mlir</code> and then run the following given commands:</p> <pre><code>func.func @add(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt; {\n  %0 = tensor.empty() : tensor&lt;4xf32&gt;\n  %1 = linalg.generic {\n    indexing_maps = [\n      affine_map&lt;(d0) -&gt; (d0)&gt;, affine_map&lt;(d0) -&gt; (d0)&gt;, affine_map&lt;(d0) -&gt; (d0)&gt;], iterator_types = [\"parallel\"]}\n      ins(%arg0, %arg1 : tensor&lt;4xf32&gt;, tensor&lt;4xf32&gt;)\n      outs(%0 : tensor&lt;4xf32&gt;) {\n  ^bb0(%in: f32, %in_0: f32, %out: f32):\n    %2 = arith.addf %in, %in_0 : f32\n    linalg.yield %2 : f32\n  } -&gt; tensor&lt;4xf32&gt;\n  return %1 : tensor&lt;4xf32&gt;\n}\n</code></pre> <pre><code># First compile into a VM bytecode module.\n$ ../iree-build/tools/iree-compile \\\n  --iree-hal-target-backends=cuda \\\n  /tmp/add.mlir \\\n  -o /tmp/add.vmfb\n\n# Run the module through CUDA HAL backend.\n$ ../iree-build/tools/iree-run-module \\\n  --device=cuda \\\n  --module=/tmp/add.vmfb \\\n  --function=add \\\n  --input=\"4xf32=[1 2 3 4]\" \\\n  --input=\"4xf32=[2 2 2 2]\"\n\nEXEC @add\n4xf32=3 4 5 6\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#performance","title":"Performance","text":"<p>Now that we have enabled functionality we need to look at the performance. Once again we can leverage existing MLIR transformations to speed up the developement work.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#tiling-and-distribution","title":"Tiling and distribution","text":"<p>The first obvious step to get efficient code on CUDA is to make sure we distribute the work on enough blocks and threads to fill up the GPU. At the time of bring up not all ops were being tiled and distributed in the common IREE layer. During dispatch region creation we apply tile and fuse which will distribute the work into a set of workgroups that are mapped to CUDA blocks.</p> <p>At the beginning of the code generation we look at the dispatch region and decide on the tile size for a workgroup. For CUDA we also decide the number of threads per block. We will then have a pass tiling the ops in the dispatch region a second time to distribute the work onto threads within the block.</p> <p>At this stage the IR looks like the following:</p> <pre><code>    %8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -&gt; index\n    %9 = affine.apply affine_map&lt;()[s0] -&gt; (s0 * 4)&gt;()[%8]\n    %10 = memref.subview %in0[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %11 = memref.subview %in1[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %12 = memref.subview %out[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    linalg.generic {\n        indexing_maps = [affine_map&lt;(d0) -&gt; (d0)&gt;,\n                         affine_map&lt;(d0) -&gt; (d0)&gt;,\n                         affine_map&lt;(d0) -&gt; (d0)&gt;],\n        iterator_types = [\"parallel\"]}\n      ins(%10, %11 :\n          memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;,\n          memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;)\n      outs(%12 : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;) {\n    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  // no predecessors\n      %13 = addf %arg1, %arg2 : f32\n      linalg.yield %13 : f32\n    }\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#vectorization","title":"Vectorization","text":"<p>Even though GPUs execute most operations as scalar, memory operations are optimized to access 128 bits of data per thread. Therefore it is critical to vectorize load/store operations. After tiling to a size we vectorize the IR to get vector read/write mapping to load4/store4. This significantly improves the memory access pattern of the code generated.</p> <p>This convert the previous IR to:</p> <pre><code>    %8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -&gt; index\n    %9 = affine.apply affine_map&lt;()[s0] -&gt; (s0 * 4)&gt;()[%8]\n    %10 = memref.subview %in0[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %11 = memref.subview %in1[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %12 = memref.subview %out[%9] [4] [1] : memref&lt;128xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt; to memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n    %13 = vector.transfer_read %10[%c0], %cst {in_bounds = [true]} : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;, vector&lt;4xf32&gt;\n    %14 = vector.transfer_read %11[%c0], %cst {in_bounds = [true]} : memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;, vector&lt;4xf32&gt;\n    %15 = addf %13, %14 : vector&lt;4xf32&gt;\n    vector.transfer_write %15, %12[%c0] {in_bounds = [true]} : vector&lt;4xf32&gt;, memref&lt;4xf32, affine_map&lt;(d0)[s0] -&gt; (d0 + s0)&gt;&gt;\n</code></pre>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#shared-memory-optimization","title":"Shared memory optimization","text":"<p>Nvidia GPUs have a fast shared memory that needs to be leveraged to optimize cases where we may be memory bound and have the potential to re-use memory reads.</p> <p>For operations like GEMM using shared memory gives us a significant speed up. We leverage memory promotion, vector distribution and software pipelining transformations from MLIR to generate efficient copies from global to shared memory that can be interleaved with the compute work.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#optimization-pipeline","title":"Optimization pipeline","text":"<p>Those different transformations compose to this flow:</p> <p></p> <p>The full dump step by step of a linalg.matmul operation can be found here.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#results-and-next-steps","title":"Results and next steps","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#gemm","title":"GEMM","text":"<p>We compare the performance of a single GEMM operation to highly optimized library cuBLAS using mmperf framework.</p> <p></p> <p>The graph can be re-produced based on instructions on mmperf</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#future-work","title":"Future work","text":"<p>Nod.ai has contributed an experimental HAL module for ROCM that allows us to re-use the compiler parts to support ROCM, more support is going to be added in the future.</p> <p>Several performance improvements are still under progress, including optimizing the runtime allocator to reduce the host-side overhead and tuning tile sizes based profiling.</p> <p>Several models are running and we will publish more detailed benchmark results in the near future.</p>","tags":["GPU","CUDA"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/","title":"IREE / MLIR / Linalg tutorial","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#introduction","title":"Introduction","text":"<p>This tutorial is simultaneously about IREE, MLIR, and specifically the MLIR Linalg dialect.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-mlir","title":"What is MLIR?","text":"<p>MLIR is a programming language, but MLIR in itself is almost just an empty shell. What it really provides is a framework allowing to define MLIR dialects which are where the features come from.</p> <p>The \"IR\" part of the MLIR name stands for \"intermediate representation\". It means that MLIR is meant to be primarily for compiler-internal representations of code. But MLIR is actually fairly nice for humans to work with, and it's not hard to hand-author some MLIR programs from scratch. That is exactly the topic of this tutorial.</p> <p>The \"ML\" part of the MLIR name stands for \"multi-level\" (not machine learning!). It means that MLIR allows for multiple dialects to be freely mixed in the same MLIR programs. Each dialect can define operations, types and attributes, and each single MLIR statement can mix ops, types and attributes coming from different dialects.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-the-linalg-dialect","title":"What is the Linalg dialect?","text":"<p>Linalg is a MLIR dialect that essentially consists of a single op, <code>linalg.generic</code>, with most other ops in this dialect being just convenience aliases for special cases of <code>linalg.generic</code>. So, to describe Linalg dialect is essentially to describe <code>linalg.generic</code>.</p> <p>The point of this is that this single op, <code>linalg.generic</code>, is:</p> <ul> <li>General enough to express the entirety of usual machine learning workloads in   any quantization scheme at all.</li> <li>High-level enough to be lowered to efficient code for any target (CPU, GPU,   ...)</li> <li>Designed to be a good fit for compiler IR-to-IR transformations.</li> </ul> <p>These traits make the Linalg dialect an ideal \"middle-end\" IR for a machine learning compiler.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#what-is-iree","title":"What is IREE?","text":"<p>IREE is a MLIR compiler and runtime that can lower MLIR programs through successive, ever lower-level dialects, ultimately producing machine code for various CPU, GPU and other hardware targets. Check out the Developer overview docs and the ML frameworks docs.</p> <p>Front-ends can ingest source programs from various machine-learning frameworks into MLIR Linalg dialect. Boundaries are in flux, but it is a good enough mental model to think of anything up to Linalg as \"front-end\". One example is, for ingesting PyTorch programs, the front-end is torch-mlir and end-users are encouraged to use iree-turbine, which integrates IREE, torch-mlir and PyTorch.</p> <p>This tutorial is only concerned about the Linalg dialect, and we are going to learn to hand-author some Linalg programs. The point of the above tangent about front-ends is to make it clear that no matter which way you feed a program into IREE, it will internally be rewritten into a Linalg program, because that really is the intermediate representation in this compiler.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#getting-iree-binaries","title":"Getting IREE binaries","text":"<p>IREE builds can be downloaded or installed as Python packages or built from sources.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#first-linalg-programs","title":"First linalg programs","text":"<p>Before we start: there is also an official Linalg tutorial. It takes a different approach compared to the present tutorial, so the two are complementary.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#static-shape-element-wise-addition-of-two-1d-arrays","title":"Static-shape, element-wise addition of two 1D arrays","text":"<p>Here is our first Linalg function. The scalar type used in this program, <code>f32</code>, is 32-bit floating-point.</p> <p>Notice some elements of MLIR syntax:</p> <ul> <li>The <code>%</code> prefix on an identifier indicates a   SSA value, like   here <code>%result</code>.</li> <li>The <code>@</code> prefix on an identifier indicates a function, like here <code>@foo</code>.</li> <li>The <code>^</code> prefix on an identifier indicates a   block, like here <code>^bb0</code>.</li> <li>The <code>#</code> prefix on an identifier indicates an   attribute alias,   like here <code>#map_1d_identity</code>.</li> <li>The <code>x</code> letter is used as delimiter in shapes, and between the shape and the   element type, like here <code>10xf32</code> meaning a 1D shape of size 10 with element   type <code>f32</code>.</li> <li>Operations have the form <code>dialect.name</code>. For example, <code>tensor.empty</code> is the   <code>empty</code> operation within the <code>tensor</code> dialect, and <code>func.func</code> is the <code>func</code>   operation within the <code>func</code> dialect.</li> </ul> <pre><code>// The 1D identity map, used below.\n#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\n// Define a function @foo taking two tensor arguments `%lhs` and `%rhs` and returning a tensor.\nfunc.func @foo(\n      %lhs : tensor&lt;10xf32&gt;,\n      %rhs : tensor&lt;10xf32&gt;\n    ) -&gt; tensor&lt;10xf32&gt; {\n  // A constant used below.\n  %c0f32 = arith.constant 0.0 : f32\n  // Create a result \"init value\". Think of it as an abstract \"allocation\",\n  // creating a tensor but not giving its elements any particular value. It would be\n  // undefined behavior to read any element from this tensor.\n  %result_empty =  tensor.empty() : tensor&lt;10xf32&gt;\n\n  // Perform the computation. The following is all a single linalg.generic op.\n\n  %result = linalg.generic {\n    // This {...} section is the \"attributes\" - some compile-time settings for this op.\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    // There is one tensor dimension, and it's a parallel-iteration dimension,\n    // meaning that it occurs also as a result tensor dimension. The alternative\n    // would be \"reduction\", for dimensions that do not occur in the result tensor.\n    iterator_types=[\"parallel\"]\n  } // End of the attributes for this linalg.generic. Next come the parameters:\n    // `ins` is where we pass regular input-parameters\n    ins(%lhs, %rhs : tensor&lt;10xf32&gt;, tensor&lt;10xf32&gt;)\n    // `outs` is where we pass the \"outputs\", but that term has a subtle meaning\n    // in linalg. Here we are passing a tensor.empty, meaning just a placeholder\n    // for the output with no preexisting element values. In other examples with\n    // an accumulator, this is where the accumulator would be passed.\n    outs(%result_empty : tensor&lt;10xf32&gt;)\n    // End of parameters. The next {...} part is the \"code block\".\n  {\n    // bb0 is a code block taking one scalar from each input tensor as argument, and\n    // computing and \"yielding\" (ie returning) the corresponding output tensor element.\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32, %unused_result_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  } // End of the basic block. Finally, we describe the return type.\n  -&gt; tensor&lt;10xf32&gt;\n\n  // End of the linalg.generic op.\n\n  // Return the function's return value.\n  return %result : tensor&lt;10xf32&gt;\n}\n</code></pre> <p>Compile it like this:</p> <pre><code>iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n</code></pre> <p>Note</p> <p>These are just minimalist <code>iree-compile</code> flags for running on CPU without trying to maximize performance.</p> <ul> <li>To run on GPU or other non-CPU targets, explore other values for   <code>--iree-hal-target-backends=</code>. You will then need to pass a matching   <code>--device=</code> to <code>iree-run-module</code> below.</li> <li>To cross-compile, explore <code>--iree-llvmcpu-target-triple=</code>.</li> <li>To enable higher CPU performance by enabling CPU features:<ul> <li>On x86, explore <code>--iree-llvmcpu-target-cpu=</code> (e.g.   <code>--iree-llvmcpu-target-cpu=znver4</code> to target AMD Zen4).</li> <li>On other architectures, explore <code>--iree-llvmcpu-target-cpu-features=</code>.</li> <li>To optimize for running on the same machine that the compilation ran   on, pass  <code>--iree-llvmcpu-target-cpu=host</code>. That works regardless of   CPU architecture.</li> </ul> </li> <li>Check out   these docs for   more useful <code>iree-compile</code> flags.</li> </ul> <p>Run it like this:</p> <pre><code>$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre> <p>Here, each <code>--input</code> parameter specifies one input. First its shape and element type, <code>10xf32</code>, then the example array elements in <code>[...]</code> brackets. The output of <code>iree-run-module</code> above shows the contents of the result.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#dynamic-shape-element-wise-addition-of-two-1d-arrays","title":"Dynamic-shape, element-wise addition of two 1D arrays","text":"<p>While we are going to mostly focus on static shapes for simplicity in the rest of this tutorial, let us give one dynamic-shape example to at least show that that's not a problem. Here is the dynamic-shape equivalent of the previous example.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %lhs : tensor&lt;?xf32&gt;,\n      %rhs : tensor&lt;?xf32&gt;\n    ) -&gt; tensor&lt;?xf32&gt; {\n  %c0f32 = arith.constant 0.0 : f32\n  %c0 = arith.constant 0 : index\n  %size = tensor.dim %lhs, %c0 : tensor&lt;?xf32&gt;\n  %result_empty =  tensor.empty(%size) : tensor&lt;?xf32&gt;\n\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    iterator_types=[\"parallel\"]\n  } ins(%lhs, %rhs : tensor&lt;?xf32&gt;, tensor&lt;?xf32&gt;)\n    outs(%result_empty : tensor&lt;?xf32&gt;)\n  {\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32, %unused_result_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;?xf32&gt;\n\n  return %result : tensor&lt;?xf32&gt;\n}\n</code></pre> <p>This program can be compiled and run exactly like the previous one, except that now the <code>iree-run-module</code> command may specify inputs of arbitrary length. The only requirement is that both inputs have the same length, otherwise the <code>linalg.generic</code> will have undefined behavior.</p> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#passing-one-of-the-inputs-in-outs","title":"Passing one of the inputs in <code>outs</code>","text":"<p>Here is a more concise variant achieving the same result in fewer lines of code, and giving us a first taste of that that <code>outs(...)</code> parameters list can do. We didn't want to show it first, because it's less idiomatic. <code>outs</code> will only become really necessary (and idiomatic) when we will look at <code>reduction</code> iterators. In the previous examples, we had only passed a <code>tensor.empty</code> placeholder for <code>outs</code>. This new example shows that we can actually pass there any of the inputs that are shaped like the result.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %lhs : tensor&lt;10xf32&gt;,\n      %rhs : tensor&lt;10xf32&gt;\n    ) -&gt; tensor&lt;10xf32&gt; {\n\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_identity\n    ],\n    iterator_types=[\"parallel\"]\n  } ins(%lhs : tensor&lt;10xf32&gt;)\n    outs(%rhs : tensor&lt;10xf32&gt;)\n  {\n    ^bb0(%lhs_entry : f32, %rhs_entry : f32):\n      %add = arith.addf %lhs_entry, %rhs_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;10xf32&gt;\n\n  return %result : tensor&lt;10xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=10xf32=[0,1,2,3,4,5,6,7,8,9] \\\n  --input=10xf32=[90,80,70,60,50,40,30,20,10,0]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n10xf32=90 81 72 63 54 45 36 27 18 9\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#a-first-reduction-example-summing-a-1d-array","title":"A first <code>reduction</code> example: summing a 1D array","text":"<p>This function takes a 1D array of floats and returns their sum. <code>tensor&lt;f32&gt;</code> is a 0-dimensional tensor type. We could as well extract the single <code>f32</code> element and return that, but we wanted to make this example as simple as possible.</p> <p>What's subtle here is how the <code>bb0</code> block in the <code>linalg.generic</code> now actively uses the <code>%result_entry</code> as an operand to <code>arith.addf</code>, yielding the result of this addition on every iteration. Implicitly, this stores the result of that addition to the destination, from where it is re-loaded on the next iteration again as <code>%result_entry</code>. So the SSA value <code>%result_entry</code> has a different value on each iteration.</p> <p>Because the values from the <code>outs</code> parameter are now actually used, we can't directly pass there the <code>tensor.empty</code>, whose elements are uninitialized. We have to initialize the result entries as zeroes, which is achieved by the <code>linalg.fill</code>.</p> <pre><code>#map_1d_identity = affine_map&lt;(m) -&gt; (m)&gt;\n#map_1d_proj_0d = affine_map&lt;(m) -&gt; ()&gt;\n\nfunc.func @foo(\n      %input : tensor&lt;10xf32&gt;) -&gt; tensor&lt;f32&gt; {\n  %result_empty = tensor.empty() : tensor&lt;f32&gt;\n  %cst_0 = arith.constant 0.0 : f32\n  %result_init = linalg.fill ins(%cst_0 : f32) outs(%result_empty : tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_1d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_1d_proj_0d\n    ],\n    iterator_types=[\"reduction\"]\n  } ins(%input : tensor&lt;10xf32&gt;)\n    outs(%result_init : tensor&lt;f32&gt;)\n  {\n    ^bb0(%input_entry : f32, %result_entry : f32):\n      %add = arith.addf %input_entry, %result_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;f32&gt;\n\n  return %result : tensor&lt;f32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb --input=10xf32=[0,1,2,3,4,5,6,7,8,9]\n\nEXEC @foo\nresult[0]: hal.buffer_view\nf32=45\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#combining-parallel-and-reduction-iterators-summing-each-row-of-a-2d-array","title":"Combining <code>parallel</code> and <code>reduction</code> iterators: summing each row of a 2D array.","text":"<p>This is our first 2D example so for the first time we have to start explaining how the <code>iterator_types</code> are enumerated and we start seeing some more interesting examples of <code>affine_map</code>.</p> <pre><code>#map_2d_identity = affine_map&lt;(m, n) -&gt; (m, n)&gt;\n#map_2d_proj_first = affine_map&lt;(m, n) -&gt; (m)&gt;\n\nfunc.func @foo(\n      %input : tensor&lt;3x5xf32&gt;) -&gt; tensor&lt;3xf32&gt; {\n  %result_empty = tensor.empty() : tensor&lt;3xf32&gt;\n  %cst_0 = arith.constant 0.0 : f32\n  %result_init = linalg.fill ins(%cst_0 : f32) outs(%result_empty : tensor&lt;3xf32&gt;) -&gt; tensor&lt;3xf32&gt;\n  %result = linalg.generic {\n    indexing_maps=[\n      // Indexing maps for the parameters listed in `ins(...)`\n      #map_2d_identity,\n      // Indexing maps for the parameters listed in `outs(...)`\n      #map_2d_proj_first\n    ],\n    iterator_types=[\n      // Rule: the i-th iterator_type corresponds to the i-th coordinate in the\n      // source space of the affine_maps defined above, (m, n). So:\n      \"parallel\",  // This refers to the `m` coordinate in the affine-maps.\n                   // This is the coordinate that is preserved in the result,\n                   // see the map_2d_proj_first map given above.\n      \"reduction\" // This refers to the `n` coordinate in the affine-maps.\n                  // This is the coordinate that is dropped by the map_2d_proj_first\n                  // given above and thus not present in the 1D result.\n    ]\n  } ins(%input : tensor&lt;3x5xf32&gt;)\n    outs(%result_init : tensor&lt;3xf32&gt;)\n  {\n    ^bb0(%input_entry : f32, %result_entry : f32):\n      %add = arith.addf %input_entry, %result_entry : f32\n      linalg.yield %add : f32\n  }\n  -&gt; tensor&lt;3xf32&gt;\n\n  return %result : tensor&lt;3xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=3x5xf32=[[0,1,2,3,4],[5,6,7,8,9],[10,11,12,13,14]]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n3xf32=10 35 60\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#matrix-multiplication-as-a-linalgmatmul-and-as-a-linalggeneric","title":"Matrix multiplication as a <code>linalg.matmul</code> and as a <code>linalg.generic</code>","text":"<p>We are now ready to see how to express matrix multiplication as a <code>linalg.generic</code>. But actually, rather than just writing that by hand, we are going to let Linalg do it for us. Indeed, in addition to <code>linalg.generic</code>, Linalg contains a number of \"named ops\", which are essentially just short-hand notation for special cases of <code>linalg.generic</code>. One of them is <code>linalg.matmul</code>, doing matrix multiplication accumulating into an existing accumulator. Here is a simple function performing a matrix-multiplication-with-accumulation using <code>linalg.matmul</code>. Also in this example, we use dynamic shapes (the <code>?</code> in the shapes, see the above section where we encountered that), but we could just as well use static shapes.</p> <pre><code>func.func @foo(%lhs: tensor&lt;?x?xf32&gt;, %rhs: tensor&lt;?x?xf32&gt;, %acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n  %result = linalg.matmul\n    ins(%lhs, %rhs: tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;)\n    outs(%acc: tensor&lt;?x?xf32&gt;)\n  -&gt; tensor&lt;?x?xf32&gt;\n  return %result: tensor&lt;?x?xf32&gt;\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=2x2xf32=[[1,2][3,4]] \\\n  --input=2x2xf32=[[1,4][3,2]] \\\n  --input=2x2xf32=[[0,0][0,0]]\n\nEXEC @matmul_dynamic\nresult[0]: hal.buffer_view\n2x2xf32=[7 8][15 20]\n</code></pre> <p>Now we encounter another IREE tool: <code>iree-opt</code>. Unlike <code>iree-compile</code> which compiles a MLIR program all the way down to a <code>.vmfb</code> that's ready to run on the target device, <code>iree-opt</code> only applies selected transformations.</p> <p>We run:</p> <pre><code>iree-opt --linalg-generalize-named-ops prog.mlir\n</code></pre> <p>And that prints:</p> <pre><code>#map = affine_map&lt;(d0, d1, d2) -&gt; (d0, d2)&gt;\n#map1 = affine_map&lt;(d0, d1, d2) -&gt; (d2, d1)&gt;\n#map2 = affine_map&lt;(d0, d1, d2) -&gt; (d0, d1)&gt;\nmodule {\n  func.func @foo(%arg0: tensor&lt;?x?xf32&gt;, %arg1: tensor&lt;?x?xf32&gt;, %arg2: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = [\"parallel\", \"parallel\", \"reduction\"]} ins(%arg0, %arg1 : tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%arg2 : tensor&lt;?x?xf32&gt;) {\n    ^bb0(%in: f32, %in_0: f32, %out: f32):\n      %1 = arith.mulf %in, %in_0 : f32\n      %2 = arith.addf %out, %1 : f32\n      linalg.yield %2 : f32\n    } -&gt; tensor&lt;?x?xf32&gt;\n    return %0 : tensor&lt;?x?xf32&gt;\n  }\n}\n</code></pre> <p>So that's the <code>linalg.generic</code> implementing matrix multiplication equivalently to the above <code>linalg.matmul</code> form. We can  compile and run that like the above program and it will have exactly the same result.</p> <p>Here the 3 listed <code>iterator_types</code>, <code>[\"parallel\", \"parallel\", \"reduction\"]</code>, correspond to the 3 listed coordinates in the <code>affine_map</code>'s, <code>(d0, d1, d2)</code>. So, <code>d0</code> and <code>d1</code> are parallel dimensions and <code>d2</code> is the reduction dimension. That's why the first two <code>affine_map</code>'s results involve <code>d2</code> (they are respectively for the LHS <code>%arg0</code> and RHS <code>%arg1</code>) and the last <code>affine_map</code>'s result only involves the parallel <code>d0</code> and <code>d1</code>, as it refers to the result matrix.</p> <p>Note</p> <p>Some current IREE compiler optimizations are only triggering on named ops like <code>linalg.matmul</code>, not on the equivalent <code>linalg.generic</code> form. Think of that as a non-essential current limitation, and the intent is over time to overcome these, but in the near term do use <code>linalg.matmul</code> when performance matters.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-29-iree-mlir-linalg-tutorial/#integer-element-types","title":"Integer element types","text":"<p>MLIR defines integer types for absolutely any bit-width, including non-power-of-two bit-widths, and in three signedness flavors:</p> <ul> <li>Signed integers, indicated by the letters <code>si</code>.</li> <li>Unsigned integers, indicated by the letters <code>ui</code>.</li> <li>Sign-less integers indicated by the letter <code>i</code>. \"Sign-less\" means that the   integer type does not convey signedness; the integer value may be used as   either a signed or an unsigned value but that's a property of the operation   using that value as an operand, that's not encoded in the type.</li> </ul> <p>So for instance, <code>si16</code> is the 16-bit signed integer type, <code>ui24</code> is the 24-bit unsigned integer type, and <code>i8</code> is the sign-less 8-bit integer type.</p> <p>Now here is a very important principle of how the MLIR dialects that are relevant to us in IREE operate:</p> <p>Note</p> <p>Only use sign-less types. Always encode signedness in operations, not in types.</p> <p>For example, here is how we perform a matrix multiplication where the LHS is signed 8-bit integers, the RHS is unsigned 8-bit integers, and the accumulator is signed 32-bit integers. Notice how the fact that LHS is signed and the RHS is unsigned is encoded only in the implementation of the <code>linalg.generic</code> basic block, where the LHS and RHS entries are extended, respectively as signed (<code>arith.extsi</code>) and unsigned (<code>arith.extui</code>):</p> <pre><code>#map = affine_map&lt;(d0, d1, d2) -&gt; (d0, d2)&gt;\n#map1 = affine_map&lt;(d0, d1, d2) -&gt; (d2, d1)&gt;\n#map2 = affine_map&lt;(d0, d1, d2) -&gt; (d0, d1)&gt;\nmodule {\n  func.func @foo(%lhs: tensor&lt;?x?xi8&gt;, %rhs: tensor&lt;?x?xi8&gt;, %acc: tensor&lt;?x?xi32&gt;) -&gt; tensor&lt;?x?xi32&gt; {\n    %result = linalg.generic\n      {indexing_maps = [#map, #map1, #map2],\n       iterator_types = [\"parallel\", \"parallel\", \"reduction\"]}\n      ins(%lhs, %rhs : tensor&lt;?x?xi8&gt;, tensor&lt;?x?xi8&gt;)\n      outs(%acc : tensor&lt;?x?xi32&gt;) {\n    ^bb0(%lhs_entry: i8, %rhs_entry: i8, %acc_entry: i32):\n      %lhs_extended = arith.extsi %lhs_entry : i8 to i32\n      %rhs_extended = arith.extui %rhs_entry : i8 to i32\n      %mul = arith.muli %lhs_extended, %rhs_extended : i32\n      %add = arith.addi %acc_entry, %mul : i32\n      linalg.yield %add : i32\n    } -&gt; tensor&lt;?x?xi32&gt;\n    return %result : tensor&lt;?x?xi32&gt;\n  }\n}\n</code></pre> <pre><code>$ iree-compile --iree-hal-target-backends=llvm-cpu prog.mlir -o /tmp/prog.vmfb\n$ iree-run-module --module=/tmp/prog.vmfb \\\n  --input=2x2xi8=[[-1,-2][-3,-4]] \\\n  --input=2x2xi8=[[1,4][3,2]] \\\n  --input=2x2xi32=[[0,0][0,0]]\n\nEXEC @foo\nresult[0]: hal.buffer_view\n2x2xi32=[-7 -8][-15 -20]\n</code></pre> <p>Note</p> <p>A current runtime limitation, https://github.com/iree-org/iree/issues/16241, prevents passing sub-byte-bit-width integers on the <code>iree-run-module</code> command line.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/","title":"Exploring CPU microkernels on a matmul example","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#basic-setup-command-lines","title":"Basic setup, command lines","text":"<p>Source file: <code>matmul.mlir</code>:</p> <pre><code>func.func @matmul_dynamic(%lhs: tensor&lt;?x?xf32&gt;, %rhs: tensor&lt;?x?xf32&gt;, %acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt; {\n  %result = linalg.matmul ins(%lhs, %rhs: tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%acc: tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;\n  return %result: tensor&lt;?x?xf32&gt;\n}\n</code></pre> <p>Basic compilation command line:</p> <pre><code>$ iree-compile matmul.mlir -o /tmp/matmul.vmfb \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-target-cpu=znver4 \\\n  --iree-llvmcpu-enable-ukernels=all\n</code></pre> <p>This creates a IREE bytecode module:</p> <pre><code>$ ls -l /tmp/matmul.vmfb\n\n-rw-rw-r-- 1 2884 Jan 22 10:37 /tmp/matmul.vmfb\n</code></pre> <p>The above <code>.vmfb</code> is the only thing that's needed to run this matmul on the target device. But to understand microkernels, we are now going to generate additional intermediate files.</p> <p>Additional <code>iree-compile</code> flags to save intermediate files (IR, assembly, object code):</p> <pre><code>--iree-hal-dump-executable-intermediates-to=/tmp/matmul --x86-asm-syntax=intel\n</code></pre> <p>This saves LLVM IR in binary serialization (\"bitcode\", filename extension <code>.bc</code>). To read it, we need to \"disassemble\" it using <code>llvm-dis</code> to obtain textual IR (filename extension <code>.ll</code>).</p> <pre><code>llvm-dis /tmp/matmul/*.bc\n</code></pre> <p>Intermediate files:</p> <pre><code>  35196 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.bc\n 251597 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.ll\n 181740 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.linked.bc\n1396190 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.linked.ll\n  32096 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.o\n  34504 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.optimized.bc\n 184981 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.optimized.ll\n  82016 /tmp/matmul/module_matmul_linked_llvm_cpu_embedded_elf_x86_64.s\n</code></pre> <p>Another important <code>iree-compile</code> flag: <code>--mlir-print-ir-after-all</code> records the IR after each pass. We save that (stderr) output to a file, <code>ir.log</code> by appending to the <code>iree-compile</code> command line:</p> <pre><code>--mlir-print-ir-after-all 2&gt;/tmp/matmul/ir.log\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#overview-of-the-compilation-and-linking-flow","title":"Overview of the compilation and linking flow","text":"<p>This graph shows the transformations from the source <code>matmul.mlir</code> to the final <code>matmul.vmfb</code> with the various intermediates met in the previous section:</p> <pre><code>graph TD;\nmatmulontensors-- CPUMaterializeEncoding --&gt;mmt4dontensors;\nmmt4dontensors-- CPULowerToUKernels --&gt;ukernelontensors;\nukernelontensors-- IREEComprehensiveBufferize --&gt;ukernelonmemref;\nukernelonmemref-- LowerUKernelOpsToCalls --&gt;ukernelcall;\nukernelcall-- ConvertToLLVM --&gt;codegenll;\ncodegenll--&gt;bitcodelinking;\ngenericsource-- clang -emit-llvm --&gt; genericbitcode -- llvm-link --&gt; ukernelbitcode;\narchsource -- clang -emit-llvm --&gt; archbitcode -- llvm-link --&gt; ukernelbitcode;\nukernelbitcode--&gt;ukernelbitcodeembedded;\nukernelbitcodeembedded--&gt;bitcodelinking;\nbitcodelinking--&gt;linkedll;\nlinkedll -- IR optimization --&gt; optimizedll;\noptimizedll -- LLVM x86 backend --&gt; asm -- LLVM assembler --&gt; object -- iree-compile output --&gt; vmfb;\nmatmulontensors[\"linalg.matmul on tensors\"];\nmmt4dontensors[\"linalg.mmt4d on tensors\"];\nukernelontensors[\"ukernel.generic on tensors\"];\nukernelonmemref[\"ukernel.generic on memrefs\"];\nukernelcall[\"call to ukernel entry point\"];\ncodegenll[\"module_matmul_...codegen.ll\"];\nlinkedll[\"module_matmul_...linked.ll\"];\noptimizedll[\"module_matmul_...optimized.ll\"];\ngenericsource[\"generic source code\nmmt4d.c\"]\narchsource[\"architecture-specific source code\nmmt4d_x86_64_avx512_base.c\"]\ngenericbitcode[\"generic code as bitcode\nukernel_bitcode_generic_x86_64.bc\"]\narchbitcode[\"architecture-specific code as bitcode\nukernel_bitcode_arch_x86_64_avx512_base.bc\"]\nukernelbitcode[\"linked bitcode\nukernel_bitcode_x86_64.bc\"];\nukernelbitcodeembedded[\"microkernel bitcode embedded as\nstatic data in iree-compile\"];\nbitcodelinking[\"llvm::Linker::LinkInModule\"];\nasm[\"x86 asm, module_matmul_...s\"];\nobject[\"x86 ELF, module_matmul_...o\"];\nvmfb[\"matmul.vmfb\"];\n\nsubgraph Part1[\"Part 1: MLIR code generation\"]\n  matmulontensors\n  mmt4dontensors\n  ukernelontensors\n  ukernelonmemref\n  ukernelcall\n  codegenll\nend\n\nsubgraph Part2[\"Part 2: Microkernels compilation (part of the IREE build)\"]\n  genericsource\n  archsource\n  genericbitcode\n  archbitcode\n  ukernelbitcode\n  ukernelbitcodeembedded\nend\n\nsubgraph Part3[\"Part 3: Linking with microkernels, optimizing, producing object code\"]\n  bitcodelinking\n  linkedll\n  optimizedll\n  asm\n  object\n  vmfb\nend\n\nstyle Part1 stroke:#FDD835,stroke-width:2px\nstyle Part2 stroke:#039BE5,stroke-width:2px\nstyle Part3 stroke:#43A047,stroke-width:2px</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-1-mlir-code-generation","title":"\ud83d\udfe8 Part 1: MLIR code generation","text":"<p>Some initial boilerplate happens around our <code>linalg.matmul</code> before anything interesting happens to it.:</p> <p>\u27a4 Appendix: IR dump after WrapEntryPointsPass</p> <p>Next, the first interesting thing is the <code>CPUMaterializeEncoding</code> pass, where the <code>linalg.matmul</code> gets rewritten into a <code>linalg.mmt4d</code> which is a matmul with a tiled data layout. This is where we start specializing to the target ISA feature set, AVX-512, favoring a 16x16 tile size for this float32 matmul.</p> <p>\u27a4 Appendix: IR Dump After CPUMaterializeEncoding</p> <p>The idea is that <code>linalg.mmt4d</code> is what we will have a microkernel for, below. There is no need to have microkernels for anything but the target-optimal tiled layout, so we don't bother carrying a microkernel for <code>linalg.matmul</code> itself. The matrix layout transformation, bringing matrix data into this tiled layout, is also out of the scope of this <code>linalg.mmt4d</code> and hence of the <code>mmt4d</code> microkernel: we can rely on generic code-generation to take care of these byte-permutations, which is our preference as we aim to let that fuse into producers/consumers.</p> <p>Next comes the rewrite of <code>linalg.mmt4d</code> into a microkernel op, done by the <code>CPULowerToUKernels</code> pass. Here is the TableGen definition of the generic microkernel op we're going to generate:</p> <p>TableGen definition of <code>ukernel.generic</code></p> <p>C++ compiler code for CPULowerToUKernels</p> <p>\u27a4 Appendix: IR Dump After CPULowerToUKernels</p> <p>Notice that this IR is still working on <code>tensor</code> values, not on <code>memref</code> values.</p> <ul> <li>Rewrites are much nicer to perform on tensors than on memrefs.</li> <li><code>ukernel.generic</code> works with both tensors and memrefs.</li> <li>Allows performing the rewrite to <code>ukernel.generic</code> while still on tensors,   then just ride bufferization.</li> </ul> <p>Next, bufferization takes place. <code>tensor</code> values become <code>memref</code>.</p> <p>\u27a4 Appendix: IR Dump After IREEComprehensiveBufferize</p> <p>Next, the <code>LowerUKernelOpsToCalls</code> runs, rewriting <code>ukernel.generic</code> ops into function calls.</p> <ul> <li>Made possible by bufferization: there now are buffer pointers and strides to   pass to the target function.</li> </ul> <p>\u27a4 Appendix: IR Dump After LowerUKernelOpsToCalls</p> <p>Finally, this gets lowered to the MLIR LLVM dialect, in preparation for outputting plain LLVM IR.</p> <p>\u27a4 Appendix: IR Dump After ConvertToLLVM</p> <p>The above gets converted to plain LLVM IR and that's our first intermediate file, <code>module_matmul_linked_llvm_cpu_embedded_elf_x86_64.codegen.bc</code>, which <code>llvm-dis</code> helps disassemble into a textual IR file (<code>.ll</code>).</p> <p>\u27a4 Appendix: Intermediate file: <code>...codegen.bc</code>, disassembled to <code>...codegen.ll</code></p> <p>The above IR references an external symbol <code>iree_uk_mmt4d</code> for the microkernel that it calls, so it now needs to be linked against the ukernels bitcode.</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-2-microkernels-compilation-part-of-the-iree-build","title":"\ud83d\udfe6 Part 2: Microkernels compilation (part of the IREE build)","text":"<p>Microkernels are:</p> <ul> <li>Compiled to self-contained bitcode, once for each target architecture.<ul> <li>That puts requirement on the source languages that they can be defined in.<ul> <li>Can use C via <code>clang -emit-llvm</code> plus extra flags like <code>-ffreestanding</code>.<ul> <li>The source must not <code>#include</code> standard library headers or do   anything OS-specific.</li> </ul> </li> <li>Can use inline assembly but not out-of-line assembly.</li> </ul> </li> </ul> </li> <li>Taking scalar parameters, including buffer pointers and strides.<ul> <li>Array-processing microkernels have a memory-to-memory interface.</li> <li>No vector-to-vector microkernels.<ul> <li>Store-to-load-forwarding can still happen post-linking, effectively   achieving the same.</li> <li>Microkernel ops avoid MLIR vector dialect altogether.</li> </ul> </li> </ul> </li> </ul> <p>C source code for the <code>iree_uk_mmt4d</code> microkernel entry point</p> <p>This calls an architecture-specific function to return a function pointer to the optimized inner-loop implementation to use for given data types and SIMD ISA features, and then uses that in a generic outer-loop implementation.</p> <p>So the really interesting part is the implementation of the inner-loop function that we got a function pointer to. For example, here is the one used in our example where the element type is <code>f32</code> and the target has AVX-512.</p> <p>A custom CMake function, <code>iree_bitcode_library</code>, wraps <code>clang</code> to compile these C source files with special flags to obtain freestanding bitcode.</p> <p>Likewise, a custom CMake function, <code>iree_link_bitcode</code>, wraps <code>llvm-link</code> to link bitcode files.</p> <p>These are used during the IREE compiler build (as a dependency of <code>iree-compile</code>) to build microkernels as bitcode for all supported target architectures, generating one bitcode file for each architecture in the build directory:</p> <pre><code>~/iree-build$ ls ./runtime/src/iree/builtins/ukernel/ukernel_bitcode_*.bc | grep -v generic\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_arm_32.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_arm_64.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_riscv_32.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_riscv_64.bc\n./runtime/src/iree/builtins/ukernel/ukernel_bitcode_x86_64.bc\n</code></pre> <p>These files are then embedded as static data within <code>iree-compile</code>, so that <code>iree-compile</code> stays self-contained.</p> <p>Here are some samples of ukernel bitcode if you are curious what it looks like:</p> <p>\u27a4 Appendix: embedded microkernel bitcode: <code>iree_uk_mmt4d</code> ukernel entry point</p> <p>\u27a4 Appendix: embedded microkernel bitcode: inner-loop tile function</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#part-3-linking-with-microkernels-optimizing-producing-object-code","title":"\ud83d\udfe9 Part 3: Linking with microkernels, optimizing, producing object code","text":"<p>The previous two sections covered respectively the compilation of the MLIR module, and the compilation of microkernels, as two separate bitcode modules. Now we turn to how these two bitcode modules are linked together.</p> <p>After code generation, <code>iree-compile</code> loads microkernel bitcode: https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L490</p> <p>It is worth zooming into that <code>loadUKernelBitcode</code> function as, in addition to just loading the bitcode, it does one important thing: it adds the <code>alwaysinline</code> attribute on every function. As we will see just below, always inlining microkernels is key to achieving perfect results with no downsides compared to a pure code-generation approach. https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp#L36-L62</p> <p>And links it into the current module: https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L499</p> <p>The linked IR so far is not very interesting, as it is still essentially just the concatenation of the above-discussed codegen and microkernel bitcode (except now with <code>alwaysinline</code> attributes). If you are curious, it is dumped as the <code>...linked.bc</code> file.</p> <p>Where it gets interesting is that immediately after that, we run LLVM IR optimization passes, which can be thought of as a form of link-time optimization (LTO): https://github.com/iree-org/iree/blob/c437add6a3b1e3e873cec95505d37c4938fee74f/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp#L527</p> <p>At this point, all the microkernel code gets inlined into the dispatch function, the correct AVX-512 optimized tile function is selected and inlined, and everything else is DCE'd. That's how the user pays no cost for what they don't use --- not only for the microkernel entry points that they don't call, but also for all the unused code paths within each microkernel.</p> <p>\u27a4 Appendix: Intermediate file: <code>...optimized.bc</code>, disassembled to <code>...optimized.ll</code></p> <p>This then goes to the LLVM x86 backend, which produces x86 assembly.</p> <p>\u27a4 Appendix: x86 assembly</p>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#appendix","title":"Appendix","text":"","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-wrapentrypointspass","title":"IR dump after WrapEntryPointsPass","text":"<pre><code>// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //\n[...]\n// -----// IR Dump After Inliner (inline) //----- //\n#executable_target_embedded_elf_x86_64_ = #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\", {cpu = \"znver4\", cpu_features = \"+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512\", data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", native_vector_size = 64 : index, target_triple = \"x86_64-unknown-unknown-eabi-elf\", ukernels = \"all\"}&gt;\n#device_target_llvm_cpu = #hal.device.target&lt;\"llvm-cpu\", {executable_targets = [#executable_target_embedded_elf_x86_64_]}&gt;\nmodule attributes {hal.device.targets = [#device_target_llvm_cpu]} {\n  func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = \"sync func @matmul_dynamic(%input0: tensor&lt;?x?xf32&gt;, %input1: tensor&lt;?x?xf32&gt;, %input2: tensor&lt;?x?xf32&gt;) -&gt; (%output0: tensor&lt;?x?xf32&gt;)\"}} {\n    %0 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[0] : index\n    %1 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[1] : index\n    %2 = hal.tensor.import %arg0 \"input0\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%0, %1}\n    %3 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[0] : index\n    %4 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[1] : index\n    %5 = hal.tensor.import %arg1 \"input1\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%3, %4}\n    %6 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[0] : index\n    %7 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[1] : index\n    %8 = hal.tensor.import %arg2 \"input2\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%6, %7}\n    %9 = linalg.matmul ins(%2, %5 : tensor&lt;?x?xf32&gt;, tensor&lt;?x?xf32&gt;) outs(%8 : tensor&lt;?x?xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;\n    %10 = hal.tensor.export %9 \"output0\" : tensor&lt;?x?xf32&gt;{%6, %7} -&gt; !hal.buffer_view\n    return %10 : !hal.buffer_view\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-cpumaterializeencoding","title":"IR Dump After CPUMaterializeEncoding","text":"<pre><code>// -----// IR Dump After CPUMaterializeEncoding (iree-codegen-cpu-materialize-encoding) //----- //\n[...]\n// -----// IR Dump After Canonicalizer (canonicalize) //----- //\n[...]\n// -----// IR Dump After CSE (cse) //----- //\n#executable_target_embedded_elf_x86_64_ = #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\", {cpu = \"znver4\", cpu_features = \"+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512\", data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", native_vector_size = 64 : index, target_triple = \"x86_64-unknown-unknown-eabi-elf\", ukernels = \"all\"}&gt;\n#map = affine_map&lt;()[s0] -&gt; (s0 ceildiv 16)&gt;\n#device_target_llvm_cpu = #hal.device.target&lt;\"llvm-cpu\", {executable_targets = [#executable_target_embedded_elf_x86_64_]}&gt;\nmodule attributes {hal.device.targets = [#device_target_llvm_cpu]} {\n  func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = \"sync func @matmul_dynamic(%input0: tensor&lt;?x?xf32&gt;, %input1: tensor&lt;?x?xf32&gt;, %input2: tensor&lt;?x?xf32&gt;) -&gt; (%output0: tensor&lt;?x?xf32&gt;)\"}} {\n    %cst = arith.constant 0.000000e+00 : f32\n    %0 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[0] : index\n    %1 = hal.buffer_view.dim&lt;%arg0 : !hal.buffer_view&gt;[1] : index\n    %2 = hal.tensor.import %arg0 \"input0\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%0, %1}\n    %3 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[0] : index\n    %4 = hal.buffer_view.dim&lt;%arg1 : !hal.buffer_view&gt;[1] : index\n    %5 = hal.tensor.import %arg1 \"input1\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%3, %4}\n    %6 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[0] : index\n    %7 = hal.buffer_view.dim&lt;%arg2 : !hal.buffer_view&gt;[1] : index\n    %8 = hal.tensor.import %arg2 \"input2\" : !hal.buffer_view -&gt; tensor&lt;?x?xf32&gt;{%6, %7}\n    %9 = affine.apply #map()[%0]\n    %10 = tensor.empty(%9, %1) : tensor&lt;?x?x16x1xf32&gt;\n    %pack = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x1xf32&gt;\n    %11 = affine.apply #map()[%4]\n    %12 = tensor.empty(%11, %3) : tensor&lt;?x?x16x1xf32&gt;\n    %pack_0 = tensor.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x1xf32&gt;\n    %13 = affine.apply #map()[%6]\n    %14 = affine.apply #map()[%7]\n    %15 = tensor.empty(%13, %14) : tensor&lt;?x?x16x16xf32&gt;\n    %pack_1 = tensor.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor&lt;?x?xf32&gt; -&gt; tensor&lt;?x?x16x16xf32&gt;\n    %16 = linalg.mmt4d ins(%pack, %pack_0 : tensor&lt;?x?x16x1xf32&gt;, tensor&lt;?x?x16x1xf32&gt;) outs(%pack_1 : tensor&lt;?x?x16x16xf32&gt;) -&gt; tensor&lt;?x?x16x16xf32&gt;\n    %17 = tensor.empty(%6, %7) : tensor&lt;?x?xf32&gt;\n    %unpack = tensor.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor&lt;?x?x16x16xf32&gt; -&gt; tensor&lt;?x?xf32&gt;\n    %18 = hal.tensor.export %unpack \"output0\" : tensor&lt;?x?xf32&gt;{%6, %7} -&gt; !hal.buffer_view\n    return %18 : !hal.buffer_view\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-cpulowertoukernels","title":"IR Dump After CPULowerToUKernels","text":"<pre><code>// -----// IR Dump After CPULowerToUKernels (iree-codegen-cpu-lower-to-ukernels) //----- //\nmodule {\n  func.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n    %c1281_i32 = arith.constant 1281 : i32\n    %c1_i32 = arith.constant 1 : i32\n    %c16_i32 = arith.constant 16 : i32\n    %c1 = arith.constant 1 : index\n    %c0 = arith.constant 0 : index\n    %c32_i64 = arith.constant 32 : i64\n    %0 = hal.interface.constant.load[0] : i32\n    %1 = hal.interface.constant.load[1] : i32\n    %2 = hal.interface.constant.load[2] : i32\n    %3 = hal.interface.constant.load[3] : i32\n    %4 = hal.interface.constant.load[4] : i32\n    %5 = hal.interface.constant.load[5] : i32\n    %6 = hal.interface.constant.load[6] : i32\n    %7 = hal.interface.constant.load[7] : i32\n    %8 = hal.interface.constant.load[8] : i32\n    %9 = hal.interface.constant.load[9] : i32\n    %10 = hal.interface.constant.load[10] : i32\n    %11 = hal.interface.constant.load[11] : i32\n    %12 = hal.interface.constant.load[12] : i32\n    %13 = hal.interface.constant.load[13] : i32\n    %14 = hal.interface.constant.load[14] : i32\n    %15 = hal.interface.constant.load[15] : i32\n    %16 = arith.extui %0 : i32 to i64\n    %17 = arith.extui %1 : i32 to i64\n    %18 = arith.shli %17, %c32_i64 : i64\n    %19 = arith.ori %16, %18 : i64\n    %20 = arith.index_castui %19 : i64 to index\n    %21 = arith.extui %2 : i32 to i64\n    %22 = arith.extui %3 : i32 to i64\n    %23 = arith.shli %22, %c32_i64 : i64\n    %24 = arith.ori %21, %23 : i64\n    %25 = arith.index_castui %24 : i64 to index\n    %26 = arith.extui %4 : i32 to i64\n    %27 = arith.extui %5 : i32 to i64\n    %28 = arith.shli %27, %c32_i64 : i64\n    %29 = arith.ori %26, %28 : i64\n    %30 = arith.index_castui %29 : i64 to index\n    %31 = arith.extui %6 : i32 to i64\n    %32 = arith.extui %7 : i32 to i64\n    %33 = arith.shli %32, %c32_i64 : i64\n    %34 = arith.ori %31, %33 : i64\n    %35 = arith.index_castui %34 : i64 to index\n    %36 = arith.extui %8 : i32 to i64\n    %37 = arith.extui %9 : i32 to i64\n    %38 = arith.shli %37, %c32_i64 : i64\n    %39 = arith.ori %36, %38 : i64\n    %40 = arith.index_castui %39 : i64 to index\n    %41 = arith.extui %10 : i32 to i64\n    %42 = arith.extui %11 : i32 to i64\n    %43 = arith.shli %42, %c32_i64 : i64\n    %44 = arith.ori %41, %43 : i64\n    %45 = arith.index_castui %44 : i64 to index\n    %46 = arith.extui %12 : i32 to i64\n    %47 = arith.extui %13 : i32 to i64\n    %48 = arith.shli %47, %c32_i64 : i64\n    %49 = arith.ori %46, %48 : i64\n    %50 = arith.index_castui %49 : i64 to index\n    %51 = arith.extui %14 : i32 to i64\n    %52 = arith.extui %15 : i32 to i64\n    %53 = arith.shli %52, %c32_i64 : i64\n    %54 = arith.ori %51, %53 : i64\n    %55 = arith.index_castui %54 : i64 to index\n    %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%30, %35}\n    %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%40, %45}\n    %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55}\n    %workgroup_id_x = hal.interface.workgroup.id[0] : index\n    %workgroup_count_x = hal.interface.workgroup.count[0] : index\n    %workgroup_id_y = hal.interface.workgroup.id[1] : index\n    %workgroup_count_y = hal.interface.workgroup.count[1] : index\n    scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n      scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n        %59 = flow.dispatch.tensor.load %56, offsets = [%arg0, 0, 0, 0], sizes = [1, %35, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%30, %35} -&gt; tensor&lt;1x?x16x1xf32&gt;\n        %60 = flow.dispatch.tensor.load %57, offsets = [%arg1, 0, 0, 0], sizes = [1, %35, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readonly:tensor&lt;?x?x16x1xf32&gt;&gt;{%40, %45} -&gt; tensor&lt;1x?x16x1xf32&gt;\n        %61 = flow.dispatch.tensor.load %58, offsets = [%arg0, %arg1, 0, 0], sizes = [1, 1, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55} -&gt; tensor&lt;1x1x16x16xf32&gt;\n        %dim = tensor.dim %60, %c1 : tensor&lt;1x?x16x1xf32&gt;\n        %62 = iree_codegen.ukernel.generic \"iree_uk_mmt4d\" ins(%59, %60 : tensor&lt;1x?x16x1xf32&gt;, tensor&lt;1x?x16x1xf32&gt;) outs(%61 : tensor&lt;1x1x16x16xf32&gt;) (%c1, %c1, %dim, %c16_i32, %c16_i32, %c1_i32, %c1281_i32 : index, index, index, i32, i32, i32, i32) fn_def_attrs {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"]} strided_outer_dims(1) -&gt; tensor&lt;1x1x16x16xf32&gt;\n        flow.dispatch.tensor.store %62, %58, offsets = [%arg0, %arg1, 0, 0], sizes = [1, 1, 16, 16], strides = [1, 1, 1, 1] : tensor&lt;1x1x16x16xf32&gt; -&gt; !flow.dispatch.tensor&lt;readwrite:tensor&lt;?x?x16x16xf32&gt;&gt;{%50, %55}\n      }\n    }\n    return\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-ireecomprehensivebufferize","title":"IR Dump After IREEComprehensiveBufferize","text":"<pre><code>// -----// IR Dump After IREEComprehensiveBufferize (iree-codegen-iree-comprehensive-bufferize) //----- //\n[...]\n// -----// IR Dump After EmptyTensorToAllocTensor (empty-tensor-to-alloc-tensor) //----- //\n[...]\n// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //\n[...]\n// -----// IR Dump After Canonicalizer (canonicalize) //----- //\n[...]\n// -----// IR Dump After CSE (cse) //----- //\n[...]\n// -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- //\nfunc.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n  %c1281_i32 = arith.constant 1281 : i32\n  %c1_i32 = arith.constant 1 : i32\n  %c16_i32 = arith.constant 16 : i32\n  %c1 = arith.constant 1 : index\n  %c0 = arith.constant 0 : index\n  %c32_i64 = arith.constant 32 : i64\n  %0 = hal.interface.constant.load[0] : i32\n  %1 = hal.interface.constant.load[1] : i32\n  %2 = hal.interface.constant.load[2] : i32\n  %3 = hal.interface.constant.load[3] : i32\n  %4 = hal.interface.constant.load[4] : i32\n  %5 = hal.interface.constant.load[5] : i32\n  %6 = hal.interface.constant.load[6] : i32\n  %7 = hal.interface.constant.load[7] : i32\n  %8 = hal.interface.constant.load[8] : i32\n  %9 = hal.interface.constant.load[9] : i32\n  %10 = hal.interface.constant.load[10] : i32\n  %11 = hal.interface.constant.load[11] : i32\n  %12 = hal.interface.constant.load[12] : i32\n  %13 = hal.interface.constant.load[13] : i32\n  %14 = hal.interface.constant.load[14] : i32\n  %15 = hal.interface.constant.load[15] : i32\n  %16 = arith.extui %0 : i32 to i64\n  %17 = arith.extui %1 : i32 to i64\n  %18 = arith.shli %17, %c32_i64 : i64\n  %19 = arith.ori %16, %18 : i64\n  %20 = arith.index_castui %19 : i64 to index\n  %21 = arith.extui %2 : i32 to i64\n  %22 = arith.extui %3 : i32 to i64\n  %23 = arith.shli %22, %c32_i64 : i64\n  %24 = arith.ori %21, %23 : i64\n  %25 = arith.index_castui %24 : i64 to index\n  %26 = arith.extui %4 : i32 to i64\n  %27 = arith.extui %5 : i32 to i64\n  %28 = arith.shli %27, %c32_i64 : i64\n  %29 = arith.ori %26, %28 : i64\n  %30 = arith.index_castui %29 : i64 to index\n  %31 = arith.extui %6 : i32 to i64\n  %32 = arith.extui %7 : i32 to i64\n  %33 = arith.shli %32, %c32_i64 : i64\n  %34 = arith.ori %31, %33 : i64\n  %35 = arith.index_castui %34 : i64 to index\n  %36 = arith.extui %8 : i32 to i64\n  %37 = arith.extui %9 : i32 to i64\n  %38 = arith.shli %37, %c32_i64 : i64\n  %39 = arith.ori %36, %38 : i64\n  %40 = arith.index_castui %39 : i64 to index\n  %41 = arith.extui %10 : i32 to i64\n  %42 = arith.extui %11 : i32 to i64\n  %43 = arith.shli %42, %c32_i64 : i64\n  %44 = arith.ori %41, %43 : i64\n  %45 = arith.index_castui %44 : i64 to index\n  %46 = arith.extui %12 : i32 to i64\n  %47 = arith.extui %13 : i32 to i64\n  %48 = arith.shli %47, %c32_i64 : i64\n  %49 = arith.ori %46, %48 : i64\n  %50 = arith.index_castui %49 : i64 to index\n  %51 = arith.extui %14 : i32 to i64\n  %52 = arith.extui %15 : i32 to i64\n  %53 = arith.shli %52, %c32_i64 : i64\n  %54 = arith.ori %51, %53 : i64\n  %55 = arith.index_castui %54 : i64 to index\n  %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%30, %35}\n  memref.assume_alignment %56, 64 : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%40, %45}\n  memref.assume_alignment %57, 1 : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;{%50, %55}\n  memref.assume_alignment %58, 1 : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n  %workgroup_id_x = hal.interface.workgroup.id[0] : index\n  %workgroup_count_x = hal.interface.workgroup.count[0] : index\n  %workgroup_id_y = hal.interface.workgroup.id[1] : index\n  %workgroup_count_y = hal.interface.workgroup.count[1] : index\n  scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n    %subview = memref.subview %56[%arg0, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n    scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n      %subview_0 = memref.subview %57[%arg1, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n      %subview_1 = memref.subview %58[%arg0, %arg1, 0, 0] [1, 1, 16, 16] [1, 1, 1, 1] : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt; to memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;\n      iree_codegen.ukernel.generic \"iree_uk_mmt4d\" ins(%subview, %subview_0 : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;, memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;) outs(%subview_1 : memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;, #hal.descriptor_type&lt;storage_buffer&gt;&gt;) (%c1, %c1, %35, %c16_i32, %c16_i32, %c1_i32, %c1281_i32 : index, index, index, i32, i32, i32, i32) fn_def_attrs {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"]} strided_outer_dims(1)\n    }\n  }\n  return\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-lowerukernelopstocalls","title":"IR Dump After LowerUKernelOpsToCalls","text":"<pre><code>// -----// IR Dump After LowerUKernelOpsToCalls (iree-codegen-lower-ukernel-ops-to-calls) //----- //\nmodule {\n  func.func private @iree_uk_mmt4d(memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, index, index, index, i32, i32, i32, i32) attributes {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"], llvm.bareptr = true}\n  func.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() {\n    %c1281_i32 = arith.constant 1281 : i32\n    %c1_i32 = arith.constant 1 : i32\n    %c16_i32 = arith.constant 16 : i32\n    %c1 = arith.constant 1 : index\n    %c0 = arith.constant 0 : index\n    %c32_i64 = arith.constant 32 : i64\n    %0 = hal.interface.constant.load[0] : i32\n    %1 = hal.interface.constant.load[1] : i32\n    %2 = hal.interface.constant.load[2] : i32\n    %3 = hal.interface.constant.load[3] : i32\n    %4 = hal.interface.constant.load[4] : i32\n    %5 = hal.interface.constant.load[5] : i32\n    %6 = hal.interface.constant.load[6] : i32\n    %7 = hal.interface.constant.load[7] : i32\n    %8 = hal.interface.constant.load[8] : i32\n    %9 = hal.interface.constant.load[9] : i32\n    %10 = hal.interface.constant.load[10] : i32\n    %11 = hal.interface.constant.load[11] : i32\n    %12 = hal.interface.constant.load[12] : i32\n    %13 = hal.interface.constant.load[13] : i32\n    %14 = hal.interface.constant.load[14] : i32\n    %15 = hal.interface.constant.load[15] : i32\n    %16 = arith.extui %0 : i32 to i64\n    %17 = arith.extui %1 : i32 to i64\n    %18 = arith.shli %17, %c32_i64 : i64\n    %19 = arith.ori %16, %18 : i64\n    %20 = arith.index_castui %19 : i64 to index\n    %21 = arith.extui %2 : i32 to i64\n    %22 = arith.extui %3 : i32 to i64\n    %23 = arith.shli %22, %c32_i64 : i64\n    %24 = arith.ori %21, %23 : i64\n    %25 = arith.index_castui %24 : i64 to index\n    %26 = arith.extui %4 : i32 to i64\n    %27 = arith.extui %5 : i32 to i64\n    %28 = arith.shli %27, %c32_i64 : i64\n    %29 = arith.ori %26, %28 : i64\n    %30 = arith.index_castui %29 : i64 to index\n    %31 = arith.extui %6 : i32 to i64\n    %32 = arith.extui %7 : i32 to i64\n    %33 = arith.shli %32, %c32_i64 : i64\n    %34 = arith.ori %31, %33 : i64\n    %35 = arith.index_castui %34 : i64 to index\n    %36 = arith.extui %8 : i32 to i64\n    %37 = arith.extui %9 : i32 to i64\n    %38 = arith.shli %37, %c32_i64 : i64\n    %39 = arith.ori %36, %38 : i64\n    %40 = arith.index_castui %39 : i64 to index\n    %41 = arith.extui %10 : i32 to i64\n    %42 = arith.extui %11 : i32 to i64\n    %43 = arith.shli %42, %c32_i64 : i64\n    %44 = arith.ori %41, %43 : i64\n    %45 = arith.index_castui %44 : i64 to index\n    %46 = arith.extui %12 : i32 to i64\n    %47 = arith.extui %13 : i32 to i64\n    %48 = arith.shli %47, %c32_i64 : i64\n    %49 = arith.ori %46, %48 : i64\n    %50 = arith.index_castui %49 : i64 to index\n    %51 = arith.extui %14 : i32 to i64\n    %52 = arith.extui %15 : i32 to i64\n    %53 = arith.shli %52, %c32_i64 : i64\n    %54 = arith.ori %51, %53 : i64\n    %55 = arith.index_castui %54 : i64 to index\n    %56 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref&lt;?x?x16x1xf32&gt;{%30, %35}\n    memref.assume_alignment %56, 64 : memref&lt;?x?x16x1xf32&gt;\n    %57 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%20) flags(ReadOnly) : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;{%40, %45}\n    memref.assume_alignment %57, 1 : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n    %58 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%25) : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;{%50, %55}\n    memref.assume_alignment %58, 1 : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;\n    %workgroup_id_x = hal.interface.workgroup.id[0] : index\n    %workgroup_count_x = hal.interface.workgroup.count[0] : index\n    %workgroup_id_y = hal.interface.workgroup.id[1] : index\n    %workgroup_count_y = hal.interface.workgroup.count[1] : index\n    scf.for %arg0 = %workgroup_id_y to %30 step %workgroup_count_y {\n      %subview = memref.subview %56[%arg0, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n      scf.for %arg1 = %workgroup_id_x to %40 step %workgroup_count_x {\n        %subview_0 = memref.subview %57[%arg1, 0, 0, 0] [1, %35, 16, 1] [1, 1, 1, 1] : memref&lt;?x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; to memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt;\n        %subview_1 = memref.subview %58[%arg0, %arg1, 0, 0] [1, 1, 16, 16] [1, 1, 1, 1] : memref&lt;?x?x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt; to memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt;\n        %base_buffer, %offset, %sizes:4, %strides:4 = memref.extract_strided_metadata %subview : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        %base_buffer_2, %offset_3, %sizes_4:4, %strides_5:4 = memref.extract_strided_metadata %subview_0 : memref&lt;1x?x16x1xf32, strided&lt;[?, 16, 1, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        %base_buffer_6, %offset_7, %sizes_8:4, %strides_9:4 = memref.extract_strided_metadata %subview_1 : memref&lt;1x1x16x16xf32, strided&lt;[?, 256, 16, 1], offset: ?&gt;&gt; -&gt; memref&lt;f32&gt;, index, index, index, index, index, index, index, index, index\n        func.call @iree_uk_mmt4d(%base_buffer, %offset, %strides#0, %base_buffer_2, %offset_3, %strides_5#0, %base_buffer_6, %offset_7, %strides_9#0, %c1, %c1, %35, %c16_i32, %c16_i32, %c1_i32, %c1281_i32) : (memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, memref&lt;f32&gt;, index, index, index, index, index, i32, i32, i32, i32) -&gt; ()\n      }\n    }\n    return\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ir-dump-after-converttollvm","title":"IR Dump After ConvertToLLVM","text":"<pre><code>// -----// IR Dump After ConvertToLLVM (iree-convert-to-llvm) //----- //\nmodule attributes {llvm.data_layout = \"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\", llvm.target_triple = \"x86_64-unknown-unknown-eabi-elf\"} {\n  llvm.func @iree_uk_mmt4d(!llvm.ptr) attributes {hal.import.bitcode = true, hal.import.cconv = 1 : i32, hal.import.fields = [\"processor_data\"], llvm.bareptr = true}\n  llvm.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(%arg0: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias}) -&gt; i32 {\n    %0 = llvm.mlir.constant(4293970975 : i64) : i64\n    %1 = llvm.mlir.constant(8 : i64) : i64\n    %2 = llvm.mlir.constant(0 : i32) : i32\n    %3 = llvm.mlir.constant(256 : index) : i64\n    %4 = llvm.mlir.constant(-1 : index) : i64\n    %5 = llvm.mlir.constant(4 : index) : i64\n    %6 = llvm.mlir.constant(16 : index) : i64\n    %7 = llvm.mlir.constant(0 : index) : i64\n    %8 = llvm.mlir.constant(1281 : i32) : i32\n    %9 = llvm.mlir.constant(1 : i32) : i32\n    %10 = llvm.mlir.constant(16 : i32) : i32\n    %11 = llvm.mlir.constant(1 : index) : i64\n    %12 = llvm.mlir.constant(32 : i64) : i64\n    %13 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %14 = llvm.extractvalue %13[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %15 = llvm.load %14 : !llvm.ptr -&gt; i32\n    %16 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %17 = llvm.extractvalue %16[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %18 = llvm.getelementptr %17[1] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %19 = llvm.load %18 : !llvm.ptr -&gt; i32\n    %20 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %21 = llvm.extractvalue %20[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %22 = llvm.getelementptr %21[2] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %23 = llvm.load %22 : !llvm.ptr -&gt; i32\n    %24 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %25 = llvm.extractvalue %24[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %26 = llvm.getelementptr %25[3] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %27 = llvm.load %26 : !llvm.ptr -&gt; i32\n    %28 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %29 = llvm.extractvalue %28[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %30 = llvm.getelementptr %29[4] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %31 = llvm.load %30 : !llvm.ptr -&gt; i32\n    %32 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %33 = llvm.extractvalue %32[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %34 = llvm.getelementptr %33[5] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %35 = llvm.load %34 : !llvm.ptr -&gt; i32\n    %36 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %37 = llvm.extractvalue %36[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %38 = llvm.getelementptr %37[6] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %39 = llvm.load %38 : !llvm.ptr -&gt; i32\n    %40 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %41 = llvm.extractvalue %40[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %42 = llvm.getelementptr %41[7] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %43 = llvm.load %42 : !llvm.ptr -&gt; i32\n    %44 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %45 = llvm.extractvalue %44[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %46 = llvm.getelementptr %45[8] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %47 = llvm.load %46 : !llvm.ptr -&gt; i32\n    %48 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %49 = llvm.extractvalue %48[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %50 = llvm.getelementptr %49[9] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %51 = llvm.load %50 : !llvm.ptr -&gt; i32\n    %52 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %53 = llvm.extractvalue %52[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %54 = llvm.getelementptr %53[10] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %55 = llvm.load %54 : !llvm.ptr -&gt; i32\n    %56 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %57 = llvm.extractvalue %56[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %58 = llvm.getelementptr %57[11] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %59 = llvm.load %58 : !llvm.ptr -&gt; i32\n    %60 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %61 = llvm.extractvalue %60[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %62 = llvm.getelementptr %61[14] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %63 = llvm.load %62 : !llvm.ptr -&gt; i32\n    %64 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %65 = llvm.extractvalue %64[9] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %66 = llvm.getelementptr %65[15] : (!llvm.ptr) -&gt; !llvm.ptr, i32\n    %67 = llvm.load %66 : !llvm.ptr -&gt; i32\n    %68 = llvm.zext %15 : i32 to i64\n    %69 = llvm.zext %19 : i32 to i64\n    %70 = llvm.shl %69, %12  : i64\n    %71 = llvm.or %68, %70  : i64\n    %72 = llvm.zext %23 : i32 to i64\n    %73 = llvm.zext %27 : i32 to i64\n    %74 = llvm.shl %73, %12  : i64\n    %75 = llvm.or %72, %74  : i64\n    %76 = llvm.zext %31 : i32 to i64\n    %77 = llvm.zext %35 : i32 to i64\n    %78 = llvm.shl %77, %12  : i64\n    %79 = llvm.or %76, %78  : i64\n    %80 = llvm.zext %39 : i32 to i64\n    %81 = llvm.zext %43 : i32 to i64\n    %82 = llvm.shl %81, %12  : i64\n    %83 = llvm.or %80, %82  : i64\n    %84 = llvm.zext %47 : i32 to i64\n    %85 = llvm.zext %51 : i32 to i64\n    %86 = llvm.shl %85, %12  : i64\n    %87 = llvm.or %84, %86  : i64\n    %88 = llvm.zext %55 : i32 to i64\n    %89 = llvm.zext %59 : i32 to i64\n    %90 = llvm.shl %89, %12  : i64\n    %91 = llvm.or %88, %90  : i64\n    %92 = llvm.zext %63 : i32 to i64\n    %93 = llvm.zext %67 : i32 to i64\n    %94 = llvm.shl %93, %12  : i64\n    %95 = llvm.or %92, %94  : i64\n    %96 = llvm.mul %83, %6  : i64\n    %97 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %98 = llvm.extractvalue %97[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %99 = llvm.load %98 : !llvm.ptr -&gt; !llvm.ptr\n    %100 = llvm.mul %91, %6  : i64\n    %101 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %102 = llvm.extractvalue %101[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %103 = llvm.load %102 : !llvm.ptr -&gt; !llvm.ptr\n    %104 = llvm.mul %95, %3  : i64\n    %105 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %106 = llvm.extractvalue %105[10] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %107 = llvm.getelementptr %106[1] : (!llvm.ptr) -&gt; !llvm.ptr, !llvm.ptr\n    %108 = llvm.load %107 : !llvm.ptr -&gt; !llvm.ptr\n    %109 = llvm.load %arg2 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %110 = llvm.extractvalue %109[0] : !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %111 = llvm.zext %110 : i32 to i64\n    %112 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %113 = llvm.extractvalue %112[4] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %114 = llvm.zext %113 : i32 to i64\n    %115 = llvm.load %arg2 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %116 = llvm.extractvalue %115[1] : !llvm.struct&lt;\"iree_hal_executable_workgroup_state_v0_t\", (i32, i32, i16, i16, i32, ptr, i32)&gt;\n    %117 = llvm.zext %116 : i32 to i64\n    %118 = llvm.load %arg1 : !llvm.ptr -&gt; !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %119 = llvm.extractvalue %118[5] : !llvm.struct&lt;\"iree_hal_executable_dispatch_state_v0_t\", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)&gt;\n    %120 = llvm.zext %119 : i32 to i64\n    llvm.br ^bb1(%117 : i64)\n  ^bb1(%121: i64):  // 2 preds: ^bb0, ^bb4\n    %122 = llvm.icmp \"slt\" %121, %79 : i64\n    llvm.cond_br %122, ^bb2(%111 : i64), ^bb5\n  ^bb2(%123: i64):  // 2 preds: ^bb1, ^bb3\n    %124 = llvm.icmp \"slt\" %123, %87 : i64\n    llvm.cond_br %124, ^bb3, ^bb4\n  ^bb3:  // pred: ^bb2\n    %125 = llvm.mul %83, %6  : i64\n    %126 = llvm.mul %121, %125  : i64\n    %127 = llvm.icmp \"slt\" %71, %7 : i64\n    %128 = llvm.sub %4, %71  : i64\n    %129 = llvm.select %127, %128, %71 : i1, i64\n    %130 = llvm.sdiv %129, %5  : i64\n    %131 = llvm.sub %4, %130  : i64\n    %132 = llvm.select %127, %131, %130 : i1, i64\n    %133 = llvm.mul %91, %6  : i64\n    %134 = llvm.mul %123, %133  : i64\n    %135 = llvm.add %132, %134  : i64\n    %136 = llvm.mul %123, %3  : i64\n    %137 = llvm.icmp \"slt\" %75, %7 : i64\n    %138 = llvm.sub %4, %75  : i64\n    %139 = llvm.select %137, %138, %75 : i1, i64\n    %140 = llvm.sdiv %139, %5  : i64\n    %141 = llvm.sub %4, %140  : i64\n    %142 = llvm.select %137, %141, %140 : i1, i64\n    %143 = llvm.add %136, %142  : i64\n    %144 = llvm.mul %95, %3  : i64\n    %145 = llvm.mul %121, %144  : i64\n    %146 = llvm.add %143, %145  : i64\n    %147 = llvm.getelementptr inbounds %arg0[4] : (!llvm.ptr) -&gt; !llvm.ptr, !llvm.ptr\n    %148 = llvm.alloca %1 x i64 {alignment = 8 : i64} : (i64) -&gt; !llvm.ptr\n    %149 = llvm.load %147 : !llvm.ptr -&gt; i64\n    %150 = llvm.or %149, %0  : i64\n    llvm.store %150, %148 : i64, !llvm.ptr\n    %151 = llvm.getelementptr inbounds %147[1] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %152 = llvm.load %151 : !llvm.ptr -&gt; i64\n    %153 = llvm.getelementptr inbounds %148[1] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %152, %153 : i64, !llvm.ptr\n    %154 = llvm.getelementptr inbounds %147[2] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %155 = llvm.load %154 : !llvm.ptr -&gt; i64\n    %156 = llvm.getelementptr inbounds %148[2] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %155, %156 : i64, !llvm.ptr\n    %157 = llvm.getelementptr inbounds %147[3] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %158 = llvm.load %157 : !llvm.ptr -&gt; i64\n    %159 = llvm.getelementptr inbounds %148[3] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %158, %159 : i64, !llvm.ptr\n    %160 = llvm.getelementptr inbounds %147[4] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %161 = llvm.load %160 : !llvm.ptr -&gt; i64\n    %162 = llvm.getelementptr inbounds %148[4] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %161, %162 : i64, !llvm.ptr\n    %163 = llvm.getelementptr inbounds %147[5] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %164 = llvm.load %163 : !llvm.ptr -&gt; i64\n    %165 = llvm.getelementptr inbounds %148[5] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %164, %165 : i64, !llvm.ptr\n    %166 = llvm.getelementptr inbounds %147[6] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %167 = llvm.load %166 : !llvm.ptr -&gt; i64\n    %168 = llvm.getelementptr inbounds %148[6] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %167, %168 : i64, !llvm.ptr\n    %169 = llvm.getelementptr inbounds %147[7] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    %170 = llvm.load %169 : !llvm.ptr -&gt; i64\n    %171 = llvm.getelementptr inbounds %148[7] : (!llvm.ptr) -&gt; !llvm.ptr, i64\n    llvm.store %170, %171 : i64, !llvm.ptr\n    %172 = llvm.alloca %11 x !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt; : (i64) -&gt; !llvm.ptr\n    %173 = llvm.mlir.undef : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %174 = llvm.insertvalue %99, %173[0] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %175 = llvm.insertvalue %126, %174[1] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %176 = llvm.insertvalue %96, %175[2] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %177 = llvm.insertvalue %103, %176[3] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %178 = llvm.insertvalue %135, %177[4] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %179 = llvm.insertvalue %100, %178[5] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %180 = llvm.insertvalue %108, %179[6] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %181 = llvm.insertvalue %146, %180[7] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %182 = llvm.insertvalue %104, %181[8] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %183 = llvm.insertvalue %11, %182[9] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %184 = llvm.insertvalue %11, %183[10] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %185 = llvm.insertvalue %83, %184[11] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %186 = llvm.insertvalue %10, %185[12] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %187 = llvm.insertvalue %10, %186[13] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %188 = llvm.insertvalue %9, %187[14] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %189 = llvm.insertvalue %8, %188[15] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    %190 = llvm.insertvalue %148, %189[16] : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;\n    llvm.store %190, %172 : !llvm.struct&lt;(ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr)&gt;, !llvm.ptr\n    llvm.call @iree_uk_mmt4d(%172) : (!llvm.ptr) -&gt; ()\n    %191 = llvm.add %123, %114  : i64\n    llvm.br ^bb2(%191 : i64)\n  ^bb4:  // pred: ^bb2\n    %192 = llvm.add %121, %120  : i64\n    llvm.br ^bb1(%192 : i64)\n  ^bb5:  // pred: ^bb1\n    llvm.return %2 : i32\n  }\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#intermediate-file-codegenbc-disassembled-to-codegenll","title":"Intermediate file: <code>...codegen.bc</code>, disassembled to <code>...codegen.ll</code>","text":"<pre><code>define internal i32 @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !90 {\n  %4 = load %iree_hal_executable_dispatch_state_v0_t.7, ptr %1, align 8, !dbg !91\n  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 9, !dbg !91\n  %6 = load i32, ptr %5, align 4, !dbg !91\n  %7 = getelementptr i32, ptr %5, i32 1, !dbg !91\n  %8 = load i32, ptr %7, align 4, !dbg !91\n  %9 = getelementptr i32, ptr %5, i32 2, !dbg !91\n  %10 = load i32, ptr %9, align 4, !dbg !91\n  %11 = getelementptr i32, ptr %5, i32 3, !dbg !91\n  %12 = load i32, ptr %11, align 4, !dbg !91\n  %13 = getelementptr i32, ptr %5, i32 4, !dbg !91\n  %14 = load i32, ptr %13, align 4, !dbg !91\n  %15 = getelementptr i32, ptr %5, i32 5, !dbg !91\n  %16 = load i32, ptr %15, align 4, !dbg !91\n  %17 = getelementptr i32, ptr %5, i32 6, !dbg !91\n  %18 = load i32, ptr %17, align 4, !dbg !91\n  %19 = getelementptr i32, ptr %5, i32 7, !dbg !91\n  %20 = load i32, ptr %19, align 4, !dbg !91\n  %21 = getelementptr i32, ptr %5, i32 8, !dbg !91\n  %22 = load i32, ptr %21, align 4, !dbg !91\n  %23 = getelementptr i32, ptr %5, i32 9, !dbg !91\n  %24 = load i32, ptr %23, align 4, !dbg !91\n  %25 = getelementptr i32, ptr %5, i32 10, !dbg !91\n  %26 = load i32, ptr %25, align 4, !dbg !91\n  %27 = getelementptr i32, ptr %5, i32 11, !dbg !91\n  %28 = load i32, ptr %27, align 4, !dbg !91\n  %29 = getelementptr i32, ptr %5, i32 14, !dbg !91\n  %30 = load i32, ptr %29, align 4, !dbg !91\n  %31 = getelementptr i32, ptr %5, i32 15, !dbg !91\n  %32 = load i32, ptr %31, align 4, !dbg !91\n  %33 = zext i32 %6 to i64, !dbg !91\n  %34 = zext i32 %8 to i64, !dbg !91\n  %35 = shl i64 %34, 32, !dbg !91\n  %36 = or i64 %33, %35, !dbg !91\n  %37 = zext i32 %10 to i64, !dbg !91\n  %38 = zext i32 %12 to i64, !dbg !91\n  %39 = shl i64 %38, 32, !dbg !91\n  %40 = or i64 %37, %39, !dbg !91\n  %41 = zext i32 %14 to i64, !dbg !91\n  %42 = zext i32 %16 to i64, !dbg !91\n  %43 = shl i64 %42, 32, !dbg !91\n  %44 = or i64 %41, %43, !dbg !91\n  %45 = zext i32 %18 to i64, !dbg !91\n  %46 = zext i32 %20 to i64, !dbg !91\n  %47 = shl i64 %46, 32, !dbg !91\n  %48 = or i64 %45, %47, !dbg !91\n  %49 = zext i32 %22 to i64, !dbg !91\n  %50 = zext i32 %24 to i64, !dbg !91\n  %51 = shl i64 %50, 32, !dbg !91\n  %52 = or i64 %49, %51, !dbg !91\n  %53 = zext i32 %26 to i64, !dbg !91\n  %54 = zext i32 %28 to i64, !dbg !91\n  %55 = shl i64 %54, 32, !dbg !91\n  %56 = or i64 %53, %55, !dbg !91\n  %57 = zext i32 %30 to i64, !dbg !91\n  %58 = zext i32 %32 to i64, !dbg !91\n  %59 = shl i64 %58, 32, !dbg !91\n  %60 = or i64 %57, %59, !dbg !91\n  %61 = mul i64 %48, 16, !dbg !91\n  %62 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 10, !dbg !91\n  %63 = load ptr, ptr %62, align 8, !dbg !91\n  %64 = mul i64 %56, 16, !dbg !91\n  %65 = mul i64 %60, 256, !dbg !91\n  %66 = getelementptr ptr, ptr %62, i32 1, !dbg !91\n  %67 = load ptr, ptr %66, align 8, !dbg !91\n  %68 = load %iree_hal_executable_workgroup_state_v0_t.8, ptr %2, align 8, !dbg !91\n  %69 = extractvalue %iree_hal_executable_workgroup_state_v0_t.8 %68, 0, !dbg !91\n  %70 = zext i32 %69 to i64, !dbg !91\n  %71 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 4, !dbg !91\n  %72 = zext i32 %71 to i64, !dbg !91\n  %73 = extractvalue %iree_hal_executable_workgroup_state_v0_t.8 %68, 1, !dbg !91\n  %74 = zext i32 %73 to i64, !dbg !91\n  %75 = extractvalue %iree_hal_executable_dispatch_state_v0_t.7 %4, 5, !dbg !91\n  %76 = zext i32 %75 to i64, !dbg !91\n  br label %77, !dbg !91\n\n77:                                               ; preds = %147, %3\n  %78 = phi i64 [ %148, %147 ], [ %74, %3 ]\n  %79 = icmp slt i64 %78, %44, !dbg !91\n  br i1 %79, label %80, label %149, !dbg !91\n\n80:                                               ; preds = %83, %77\n  %81 = phi i64 [ %146, %83 ], [ %70, %77 ]\n  %82 = icmp slt i64 %81, %52, !dbg !91\n  br i1 %82, label %83, label %147, !dbg !91\n\n83:                                               ; preds = %80\n  %84 = mul i64 %78, %61, !dbg !91\n  %85 = icmp slt i64 %36, 0, !dbg !91\n  %86 = sub i64 -1, %36, !dbg !91\n  %87 = select i1 %85, i64 %86, i64 %36, !dbg !91\n  %88 = sdiv i64 %87, 4, !dbg !91\n  %89 = sub i64 -1, %88, !dbg !91\n  %90 = select i1 %85, i64 %89, i64 %88, !dbg !91\n  %91 = mul i64 %81, %64, !dbg !91\n  %92 = add i64 %90, %91, !dbg !91\n  %93 = mul i64 %81, 256, !dbg !91\n  %94 = icmp slt i64 %40, 0, !dbg !91\n  %95 = sub i64 -1, %40, !dbg !91\n  %96 = select i1 %94, i64 %95, i64 %40, !dbg !91\n  %97 = sdiv i64 %96, 4, !dbg !91\n  %98 = sub i64 -1, %97, !dbg !91\n  %99 = select i1 %94, i64 %98, i64 %97, !dbg !91\n  %100 = add i64 %93, %99, !dbg !91\n  %101 = mul i64 %78, %65, !dbg !91\n  %102 = add i64 %100, %101, !dbg !91\n  %103 = getelementptr inbounds ptr, ptr %0, i32 4, !dbg !91\n  %104 = alloca i64, i64 8, align 8, !dbg !91\n  %105 = load i64, ptr %103, align 4, !dbg !91\n  %106 = or i64 %105, 4293970975, !dbg !91\n  store i64 %106, ptr %104, align 4, !dbg !91\n  %107 = getelementptr inbounds i64, ptr %103, i32 1, !dbg !91\n  %108 = load i64, ptr %107, align 4, !dbg !91\n  %109 = getelementptr inbounds i64, ptr %104, i32 1, !dbg !91\n  store i64 %108, ptr %109, align 4, !dbg !91\n  %110 = getelementptr inbounds i64, ptr %103, i32 2, !dbg !91\n  %111 = load i64, ptr %110, align 4, !dbg !91\n  %112 = getelementptr inbounds i64, ptr %104, i32 2, !dbg !91\n  store i64 %111, ptr %112, align 4, !dbg !91\n  %113 = getelementptr inbounds i64, ptr %103, i32 3, !dbg !91\n  %114 = load i64, ptr %113, align 4, !dbg !91\n  %115 = getelementptr inbounds i64, ptr %104, i32 3, !dbg !91\n  store i64 %114, ptr %115, align 4, !dbg !91\n  %116 = getelementptr inbounds i64, ptr %103, i32 4, !dbg !91\n  %117 = load i64, ptr %116, align 4, !dbg !91\n  %118 = getelementptr inbounds i64, ptr %104, i32 4, !dbg !91\n  store i64 %117, ptr %118, align 4, !dbg !91\n  %119 = getelementptr inbounds i64, ptr %103, i32 5, !dbg !91\n  %120 = load i64, ptr %119, align 4, !dbg !91\n  %121 = getelementptr inbounds i64, ptr %104, i32 5, !dbg !91\n  store i64 %120, ptr %121, align 4, !dbg !91\n  %122 = getelementptr inbounds i64, ptr %103, i32 6, !dbg !91\n  %123 = load i64, ptr %122, align 4, !dbg !91\n  %124 = getelementptr inbounds i64, ptr %104, i32 6, !dbg !91\n  store i64 %123, ptr %124, align 4, !dbg !91\n  %125 = getelementptr inbounds i64, ptr %103, i32 7, !dbg !91\n  %126 = load i64, ptr %125, align 4, !dbg !91\n  %127 = getelementptr inbounds i64, ptr %104, i32 7, !dbg !91\n  store i64 %126, ptr %127, align 4, !dbg !91\n  %128 = alloca { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr }, i64 1, align 8, !dbg !91\n  %129 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } undef, ptr %63, 0, !dbg !91\n  %130 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %129, i64 %84, 1, !dbg !91\n  %131 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %130, i64 %61, 2, !dbg !91\n  %132 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %131, ptr %63, 3, !dbg !91\n  %133 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %132, i64 %92, 4, !dbg !91\n  %134 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %133, i64 %64, 5, !dbg !91\n  %135 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %134, ptr %67, 6, !dbg !91\n  %136 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %135, i64 %102, 7, !dbg !91\n  %137 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %136, i64 %65, 8, !dbg !91\n  %138 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %137, i64 1, 9, !dbg !91\n  %139 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %138, i64 1, 10, !dbg !91\n  %140 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %139, i64 %48, 11, !dbg !91\n  %141 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %140, i32 16, 12, !dbg !91\n  %142 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %141, i32 16, 13, !dbg !91\n  %143 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %142, i32 1, 14, !dbg !91\n  %144 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %143, i32 1281, 15, !dbg !91\n  %145 = insertvalue { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %144, ptr %104, 16, !dbg !91\n  store { ptr, i64, i64, ptr, i64, i64, ptr, i64, i64, i64, i64, i64, i32, i32, i32, i32, ptr } %145, ptr %128, align 8, !dbg !91\n  call void @iree_uk_mmt4d(ptr %128), !dbg !91\n  %146 = add i64 %81, %72, !dbg !91\n  br label %80, !dbg !91\n\n147:                                              ; preds = %80\n  %148 = add i64 %78, %76, !dbg !91\n  br label %77, !dbg !91\n\n149:                                              ; preds = %77\n  ret i32 0, !dbg !91\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ukernel-bitcode-entry-point","title":"Ukernel bitcode: entry point","text":"<pre><code>; Function Attrs: nounwind\ndefine dso_local noundef i32 @iree_uk_mmt4d(ptr noundef %0) local_unnamed_addr #10 {\n  %2 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 9\n  %3 = load i64, ptr %2, align 8, !tbaa !1001\n  %4 = icmp eq i64 %3, 0\n  br i1 %4, label %133, label %5\n\n5:                                                ; preds = %1\n  %6 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 10\n  %7 = load i64, ptr %6, align 8, !tbaa !1002\n  %8 = icmp eq i64 %7, 0\n  br i1 %8, label %133, label %9\n\n9:                                                ; preds = %5\n  %10 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 11\n  %11 = load i64, ptr %10, align 8, !tbaa !19\n  %12 = icmp eq i64 %11, 0\n  br i1 %12, label %13, label %18\n\n13:                                               ; preds = %9\n  %14 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 15\n  %15 = load i32, ptr %14, align 4, !tbaa !9\n  %16 = and i32 %15, 256\n  %17 = icmp eq i32 %16, 0\n  br i1 %17, label %18, label %133\n\n18:                                               ; preds = %13, %9\n  %19 = tail call ptr @iree_uk_mmt4d_select_tile_func(ptr noundef nonnull %0) #14\n  %20 = load i64, ptr %2, align 8, !tbaa !1001\n  %21 = trunc i64 %20 to i32\n  %22 = load i64, ptr %6, align 8, !tbaa !1002\n  %23 = trunc i64 %22 to i32\n  %24 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 15\n  %25 = load i32, ptr %24, align 4, !tbaa !9\n  %26 = zext i32 %25 to i64\n  %27 = shl i64 %26, 56\n  %28 = add i64 %27, -72057594037927936\n  %29 = ashr exact i64 %28, 56\n  %30 = getelementptr inbounds [9 x i32], ptr @switch.table.iree_uk_mmt4d, i64 0, i64 %29\n  %31 = load i32, ptr %30, align 4\n  %32 = lshr i32 %31, 8\n  %33 = and i32 %31, 7\n  %34 = and i32 %32, 7\n  %35 = and i32 %31, 327680\n  %36 = add nsw i32 %35, -196608\n  %37 = lshr exact i32 %36, 16\n  %38 = zext nneg i32 %37 to i64\n  %39 = zext nneg i32 %33 to i64\n  %40 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 3\n  %41 = load ptr, ptr %40, align 8, !tbaa !1003\n  %42 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 4\n  %43 = load i64, ptr %42, align 8, !tbaa !1004\n  %44 = zext nneg i32 %34 to i64\n  %45 = shl i64 %43, %44\n  %46 = sdiv i64 %45, 8\n  %47 = getelementptr inbounds i8, ptr %41, i64 %46\n  %48 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 2\n  %49 = load i64, ptr %48, align 8, !tbaa !1005\n  %50 = shl i64 %49, %39\n  %51 = sdiv i64 %50, 8\n  %52 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 5\n  %53 = load i64, ptr %52, align 8, !tbaa !1006\n  %54 = shl i64 %53, %44\n  %55 = sdiv i64 %54, 8\n  %56 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 8\n  %57 = load i64, ptr %56, align 8, !tbaa !1007\n  %58 = shl i64 %57, %38\n  %59 = icmp sgt i32 %21, 0\n  br i1 %59, label %60, label %133\n\n60:                                               ; preds = %18\n  %61 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 13\n  %62 = load i32, ptr %61, align 4, !tbaa !996\n  %63 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 12\n  %64 = load i32, ptr %63, align 8, !tbaa !1000\n  %65 = shl i32 %62, 16\n  %66 = ashr exact i32 %65, 16\n  %67 = shl i32 %64, 16\n  %68 = ashr exact i32 %67, 16\n  %69 = mul nsw i32 %66, %68\n  %70 = shl i32 %69, %37\n  %71 = load ptr, ptr %0, align 8, !tbaa !1008\n  %72 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 1\n  %73 = load i64, ptr %72, align 8, !tbaa !1009\n  %74 = shl i64 %73, %39\n  %75 = sdiv i64 %74, 8\n  %76 = getelementptr inbounds i8, ptr %71, i64 %75\n  %77 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 6\n  %78 = load ptr, ptr %77, align 8, !tbaa !1010\n  %79 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %0, i64 0, i32 7\n  %80 = load i64, ptr %79, align 8, !tbaa !1011\n  %81 = shl i64 %80, %38\n  %82 = getelementptr inbounds i8, ptr %78, i64 %81\n  %83 = icmp sgt i32 %23, 0\n  %84 = sext i32 %70 to i64\n  br i1 %83, label %90, label %85\n\n85:                                               ; preds = %60\n  %86 = and i32 %21, 3\n  %87 = icmp ult i32 %21, 4\n  br i1 %87, label %121, label %88\n\n88:                                               ; preds = %85\n  %89 = and i32 %21, 2147483644\n  br label %107\n\n90:                                               ; preds = %60, %102\n  %91 = phi i32 [ %105, %102 ], [ 0, %60 ]\n  %92 = phi ptr [ %103, %102 ], [ %82, %60 ]\n  %93 = phi ptr [ %104, %102 ], [ %76, %60 ]\n  tail call void @llvm.prefetch.p0(ptr %92, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %93, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  br label %94\n\n94:                                               ; preds = %94, %90\n  %95 = phi i32 [ 0, %90 ], [ %100, %94 ]\n  %96 = phi ptr [ %47, %90 ], [ %99, %94 ]\n  %97 = phi ptr [ %92, %90 ], [ %98, %94 ]\n  tail call void %19(ptr noundef %97, ptr noundef %93, ptr noundef %96, ptr noundef nonnull %0) #14\n  %98 = getelementptr inbounds i8, ptr %97, i64 %84\n  %99 = getelementptr inbounds i8, ptr %96, i64 %55\n  %100 = add nuw nsw i32 %95, 1\n  %101 = icmp eq i32 %100, %23\n  br i1 %101, label %102, label %94, !llvm.loop !1012\n\n102:                                              ; preds = %94\n  %103 = getelementptr inbounds i8, ptr %92, i64 %58\n  %104 = getelementptr inbounds i8, ptr %93, i64 %51\n  %105 = add nuw nsw i32 %91, 1\n  %106 = icmp eq i32 %105, %21\n  br i1 %106, label %133, label %90, !llvm.loop !1013\n\n107:                                              ; preds = %107, %88\n  %108 = phi ptr [ %82, %88 ], [ %117, %107 ]\n  %109 = phi ptr [ %76, %88 ], [ %118, %107 ]\n  %110 = phi i32 [ 0, %88 ], [ %119, %107 ]\n  tail call void @llvm.prefetch.p0(ptr %108, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %109, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %111 = getelementptr inbounds i8, ptr %108, i64 %58\n  %112 = getelementptr inbounds i8, ptr %109, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %111, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %112, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %113 = getelementptr inbounds i8, ptr %111, i64 %58\n  %114 = getelementptr inbounds i8, ptr %112, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %113, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %114, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %115 = getelementptr inbounds i8, ptr %113, i64 %58\n  %116 = getelementptr inbounds i8, ptr %114, i64 %51\n  tail call void @llvm.prefetch.p0(ptr %115, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %116, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %117 = getelementptr inbounds i8, ptr %115, i64 %58\n  %118 = getelementptr inbounds i8, ptr %116, i64 %51\n  %119 = add i32 %110, 4\n  %120 = icmp eq i32 %119, %89\n  br i1 %120, label %121, label %107, !llvm.loop !1013\n\n121:                                              ; preds = %107, %85\n  %122 = phi ptr [ %82, %85 ], [ %117, %107 ]\n  %123 = phi ptr [ %76, %85 ], [ %118, %107 ]\n  %124 = icmp eq i32 %86, 0\n  br i1 %124, label %133, label %125\n\n125:                                              ; preds = %121, %125\n  %126 = phi ptr [ %129, %125 ], [ %122, %121 ]\n  %127 = phi ptr [ %130, %125 ], [ %123, %121 ]\n  %128 = phi i32 [ %131, %125 ], [ 0, %121 ]\n  tail call void @llvm.prefetch.p0(ptr %126, i32 1, i32 1, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %127, i32 0, i32 3, i32 1)\n  tail call void @llvm.prefetch.p0(ptr %47, i32 0, i32 3, i32 1)\n  %129 = getelementptr inbounds i8, ptr %126, i64 %58\n  %130 = getelementptr inbounds i8, ptr %127, i64 %51\n  %131 = add i32 %128, 1\n  %132 = icmp eq i32 %131, %86\n  br i1 %132, label %133, label %125, !llvm.loop !1014\n\n133:                                              ; preds = %121, %125, %102, %1, %5, %13, %18\n  ret i32 0\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#ukernel-bitcode-tile-function","title":"Ukernel bitcode: tile function","text":"<pre><code>; Function Attrs: nofree norecurse nosync nounwind memory(read, argmem: readwrite, inaccessiblemem: readwrite)\ndefine dso_local void @iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, ptr noalias nocapture noundef readonly %2, ptr nocapture noundef readonly %3) #4 {\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !367)\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !370)\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !372)\n  tail call void @llvm.prefetch.p0(ptr %1, i32 0, i32 3, i32 1), !noalias !374\n  tail call void @llvm.prefetch.p0(ptr %2, i32 0, i32 3, i32 1), !noalias !375\n  %5 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %3, i64 0, i32 15\n  %6 = load i32, ptr %5, align 4, !tbaa !9, !noalias !376\n  %7 = and i32 %6, 256\n  %8 = icmp eq i32 %7, 0\n  br i1 %8, label %41, label %9\n\n9:                                                ; preds = %4\n  %10 = load &lt;16 x float&gt;, ptr %0, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %11 = getelementptr inbounds float, ptr %0, i64 16\n  %12 = load &lt;16 x float&gt;, ptr %11, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %13 = getelementptr inbounds float, ptr %0, i64 32\n  %14 = load &lt;16 x float&gt;, ptr %13, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %15 = getelementptr inbounds float, ptr %0, i64 48\n  %16 = load &lt;16 x float&gt;, ptr %15, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %17 = getelementptr inbounds float, ptr %0, i64 64\n  %18 = load &lt;16 x float&gt;, ptr %17, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %19 = getelementptr inbounds float, ptr %0, i64 80\n  %20 = load &lt;16 x float&gt;, ptr %19, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %21 = getelementptr inbounds float, ptr %0, i64 96\n  %22 = load &lt;16 x float&gt;, ptr %21, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %23 = getelementptr inbounds float, ptr %0, i64 112\n  %24 = load &lt;16 x float&gt;, ptr %23, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %25 = getelementptr inbounds float, ptr %0, i64 128\n  %26 = load &lt;16 x float&gt;, ptr %25, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %27 = getelementptr inbounds float, ptr %0, i64 144\n  %28 = load &lt;16 x float&gt;, ptr %27, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %29 = getelementptr inbounds float, ptr %0, i64 160\n  %30 = load &lt;16 x float&gt;, ptr %29, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %31 = getelementptr inbounds float, ptr %0, i64 176\n  %32 = load &lt;16 x float&gt;, ptr %31, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %33 = getelementptr inbounds float, ptr %0, i64 192\n  %34 = load &lt;16 x float&gt;, ptr %33, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %35 = getelementptr inbounds float, ptr %0, i64 208\n  %36 = load &lt;16 x float&gt;, ptr %35, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %37 = getelementptr inbounds float, ptr %0, i64 224\n  %38 = load &lt;16 x float&gt;, ptr %37, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %39 = getelementptr inbounds float, ptr %0, i64 240\n  %40 = load &lt;16 x float&gt;, ptr %39, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  br label %41\n\n41:                                               ; preds = %4, %9\n  %42 = phi &lt;16 x float&gt; [ %40, %9 ], [ zeroinitializer, %4 ]\n  %43 = phi &lt;16 x float&gt; [ %38, %9 ], [ zeroinitializer, %4 ]\n  %44 = phi &lt;16 x float&gt; [ %36, %9 ], [ zeroinitializer, %4 ]\n  %45 = phi &lt;16 x float&gt; [ %34, %9 ], [ zeroinitializer, %4 ]\n  %46 = phi &lt;16 x float&gt; [ %32, %9 ], [ zeroinitializer, %4 ]\n  %47 = phi &lt;16 x float&gt; [ %30, %9 ], [ zeroinitializer, %4 ]\n  %48 = phi &lt;16 x float&gt; [ %28, %9 ], [ zeroinitializer, %4 ]\n  %49 = phi &lt;16 x float&gt; [ %26, %9 ], [ zeroinitializer, %4 ]\n  %50 = phi &lt;16 x float&gt; [ %24, %9 ], [ zeroinitializer, %4 ]\n  %51 = phi &lt;16 x float&gt; [ %22, %9 ], [ zeroinitializer, %4 ]\n  %52 = phi &lt;16 x float&gt; [ %20, %9 ], [ zeroinitializer, %4 ]\n  %53 = phi &lt;16 x float&gt; [ %18, %9 ], [ zeroinitializer, %4 ]\n  %54 = phi &lt;16 x float&gt; [ %16, %9 ], [ zeroinitializer, %4 ]\n  %55 = phi &lt;16 x float&gt; [ %14, %9 ], [ zeroinitializer, %4 ]\n  %56 = phi &lt;16 x float&gt; [ %12, %9 ], [ zeroinitializer, %4 ]\n  %57 = phi &lt;16 x float&gt; [ %10, %9 ], [ zeroinitializer, %4 ]\n  %58 = getelementptr inbounds %struct.iree_uk_mmt4d_params_t, ptr %3, i64 0, i32 11\n  %59 = load i64, ptr %58, align 8, !tbaa !19, !noalias !376\n  %60 = icmp sgt i64 %59, 0\n  br i1 %60, label %61, label %167\n\n61:                                               ; preds = %41, %61\n  %62 = phi &lt;16 x float&gt; [ %161, %61 ], [ %42, %41 ]\n  %63 = phi &lt;16 x float&gt; [ %156, %61 ], [ %43, %41 ]\n  %64 = phi &lt;16 x float&gt; [ %151, %61 ], [ %44, %41 ]\n  %65 = phi &lt;16 x float&gt; [ %146, %61 ], [ %45, %41 ]\n  %66 = phi &lt;16 x float&gt; [ %141, %61 ], [ %46, %41 ]\n  %67 = phi &lt;16 x float&gt; [ %136, %61 ], [ %47, %41 ]\n  %68 = phi &lt;16 x float&gt; [ %131, %61 ], [ %48, %41 ]\n  %69 = phi &lt;16 x float&gt; [ %126, %61 ], [ %49, %41 ]\n  %70 = phi &lt;16 x float&gt; [ %121, %61 ], [ %50, %41 ]\n  %71 = phi &lt;16 x float&gt; [ %116, %61 ], [ %51, %41 ]\n  %72 = phi &lt;16 x float&gt; [ %111, %61 ], [ %52, %41 ]\n  %73 = phi &lt;16 x float&gt; [ %106, %61 ], [ %53, %41 ]\n  %74 = phi &lt;16 x float&gt; [ %101, %61 ], [ %54, %41 ]\n  %75 = phi &lt;16 x float&gt; [ %96, %61 ], [ %55, %41 ]\n  %76 = phi &lt;16 x float&gt; [ %91, %61 ], [ %56, %41 ]\n  %77 = phi &lt;16 x float&gt; [ %86, %61 ], [ %57, %41 ]\n  %78 = phi i64 [ %165, %61 ], [ 0, %41 ]\n  %79 = phi ptr [ %164, %61 ], [ %1, %41 ]\n  %80 = phi ptr [ %162, %61 ], [ %2, %41 ]\n  %81 = load &lt;16 x float&gt;, ptr %80, align 1, !tbaa !17, !alias.scope !372, !noalias !375\n  %82 = getelementptr inbounds float, ptr %80, i64 128\n  tail call void @llvm.prefetch.p0(ptr nonnull %82, i32 0, i32 3, i32 1), !noalias !375\n  %83 = load float, ptr %79, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %84 = insertelement &lt;16 x float&gt; poison, float %83, i64 0\n  %85 = shufflevector &lt;16 x float&gt; %84, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %86 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %85, &lt;16 x float&gt; %81, &lt;16 x float&gt; %77)\n  %87 = getelementptr inbounds float, ptr %79, i64 1\n  %88 = load float, ptr %87, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %89 = insertelement &lt;16 x float&gt; poison, float %88, i64 0\n  %90 = shufflevector &lt;16 x float&gt; %89, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %91 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %90, &lt;16 x float&gt; %81, &lt;16 x float&gt; %76)\n  %92 = getelementptr inbounds float, ptr %79, i64 2\n  %93 = load float, ptr %92, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %94 = insertelement &lt;16 x float&gt; poison, float %93, i64 0\n  %95 = shufflevector &lt;16 x float&gt; %94, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %96 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %95, &lt;16 x float&gt; %81, &lt;16 x float&gt; %75)\n  %97 = getelementptr inbounds float, ptr %79, i64 3\n  %98 = load float, ptr %97, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %99 = insertelement &lt;16 x float&gt; poison, float %98, i64 0\n  %100 = shufflevector &lt;16 x float&gt; %99, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %101 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %100, &lt;16 x float&gt; %81, &lt;16 x float&gt; %74)\n  %102 = getelementptr inbounds float, ptr %79, i64 4\n  %103 = load float, ptr %102, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %104 = insertelement &lt;16 x float&gt; poison, float %103, i64 0\n  %105 = shufflevector &lt;16 x float&gt; %104, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %106 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %105, &lt;16 x float&gt; %81, &lt;16 x float&gt; %73)\n  %107 = getelementptr inbounds float, ptr %79, i64 5\n  %108 = load float, ptr %107, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %109 = insertelement &lt;16 x float&gt; poison, float %108, i64 0\n  %110 = shufflevector &lt;16 x float&gt; %109, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %111 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %110, &lt;16 x float&gt; %81, &lt;16 x float&gt; %72)\n  %112 = getelementptr inbounds float, ptr %79, i64 6\n  %113 = load float, ptr %112, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %114 = insertelement &lt;16 x float&gt; poison, float %113, i64 0\n  %115 = shufflevector &lt;16 x float&gt; %114, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %116 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %115, &lt;16 x float&gt; %81, &lt;16 x float&gt; %71)\n  %117 = getelementptr inbounds float, ptr %79, i64 7\n  %118 = load float, ptr %117, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %119 = insertelement &lt;16 x float&gt; poison, float %118, i64 0\n  %120 = shufflevector &lt;16 x float&gt; %119, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %121 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %120, &lt;16 x float&gt; %81, &lt;16 x float&gt; %70)\n  %122 = getelementptr inbounds float, ptr %79, i64 8\n  %123 = load float, ptr %122, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %124 = insertelement &lt;16 x float&gt; poison, float %123, i64 0\n  %125 = shufflevector &lt;16 x float&gt; %124, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %126 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %125, &lt;16 x float&gt; %81, &lt;16 x float&gt; %69)\n  %127 = getelementptr inbounds float, ptr %79, i64 9\n  %128 = load float, ptr %127, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %129 = insertelement &lt;16 x float&gt; poison, float %128, i64 0\n  %130 = shufflevector &lt;16 x float&gt; %129, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %131 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %130, &lt;16 x float&gt; %81, &lt;16 x float&gt; %68)\n  %132 = getelementptr inbounds float, ptr %79, i64 10\n  %133 = load float, ptr %132, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %134 = insertelement &lt;16 x float&gt; poison, float %133, i64 0\n  %135 = shufflevector &lt;16 x float&gt; %134, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %136 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %135, &lt;16 x float&gt; %81, &lt;16 x float&gt; %67)\n  %137 = getelementptr inbounds float, ptr %79, i64 11\n  %138 = load float, ptr %137, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %139 = insertelement &lt;16 x float&gt; poison, float %138, i64 0\n  %140 = shufflevector &lt;16 x float&gt; %139, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %141 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %140, &lt;16 x float&gt; %81, &lt;16 x float&gt; %66)\n  %142 = getelementptr inbounds float, ptr %79, i64 12\n  %143 = load float, ptr %142, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %144 = insertelement &lt;16 x float&gt; poison, float %143, i64 0\n  %145 = shufflevector &lt;16 x float&gt; %144, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %146 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %145, &lt;16 x float&gt; %81, &lt;16 x float&gt; %65)\n  %147 = getelementptr inbounds float, ptr %79, i64 13\n  %148 = load float, ptr %147, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %149 = insertelement &lt;16 x float&gt; poison, float %148, i64 0\n  %150 = shufflevector &lt;16 x float&gt; %149, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %151 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %150, &lt;16 x float&gt; %81, &lt;16 x float&gt; %64)\n  %152 = getelementptr inbounds float, ptr %79, i64 14\n  %153 = load float, ptr %152, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %154 = insertelement &lt;16 x float&gt; poison, float %153, i64 0\n  %155 = shufflevector &lt;16 x float&gt; %154, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %156 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %155, &lt;16 x float&gt; %81, &lt;16 x float&gt; %63)\n  %157 = getelementptr inbounds float, ptr %79, i64 15\n  %158 = load float, ptr %157, align 4, !tbaa !331, !alias.scope !370, !noalias !374\n  %159 = insertelement &lt;16 x float&gt; poison, float %158, i64 0\n  %160 = shufflevector &lt;16 x float&gt; %159, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer\n  %161 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %160, &lt;16 x float&gt; %81, &lt;16 x float&gt; %62)\n  %162 = getelementptr inbounds float, ptr %80, i64 16\n  %163 = getelementptr inbounds float, ptr %79, i64 128\n  tail call void @llvm.prefetch.p0(ptr nonnull %163, i32 0, i32 3, i32 1), !noalias !374\n  %164 = getelementptr inbounds float, ptr %79, i64 16\n  %165 = add nuw nsw i64 %78, 1\n  %166 = icmp eq i64 %165, %59\n  br i1 %166, label %167, label %61, !llvm.loop !333\n\n167:                                              ; preds = %61, %41\n  %168 = phi &lt;16 x float&gt; [ %42, %41 ], [ %161, %61 ]\n  %169 = phi &lt;16 x float&gt; [ %43, %41 ], [ %156, %61 ]\n  %170 = phi &lt;16 x float&gt; [ %44, %41 ], [ %151, %61 ]\n  %171 = phi &lt;16 x float&gt; [ %45, %41 ], [ %146, %61 ]\n  %172 = phi &lt;16 x float&gt; [ %46, %41 ], [ %141, %61 ]\n  %173 = phi &lt;16 x float&gt; [ %47, %41 ], [ %136, %61 ]\n  %174 = phi &lt;16 x float&gt; [ %48, %41 ], [ %131, %61 ]\n  %175 = phi &lt;16 x float&gt; [ %49, %41 ], [ %126, %61 ]\n  %176 = phi &lt;16 x float&gt; [ %50, %41 ], [ %121, %61 ]\n  %177 = phi &lt;16 x float&gt; [ %51, %41 ], [ %116, %61 ]\n  %178 = phi &lt;16 x float&gt; [ %52, %41 ], [ %111, %61 ]\n  %179 = phi &lt;16 x float&gt; [ %53, %41 ], [ %106, %61 ]\n  %180 = phi &lt;16 x float&gt; [ %54, %41 ], [ %101, %61 ]\n  %181 = phi &lt;16 x float&gt; [ %55, %41 ], [ %96, %61 ]\n  %182 = phi &lt;16 x float&gt; [ %56, %41 ], [ %91, %61 ]\n  %183 = phi &lt;16 x float&gt; [ %57, %41 ], [ %86, %61 ]\n  store &lt;16 x float&gt; %183, ptr %0, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %184 = getelementptr inbounds float, ptr %0, i64 16\n  store &lt;16 x float&gt; %182, ptr %184, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %185 = getelementptr inbounds float, ptr %0, i64 32\n  store &lt;16 x float&gt; %181, ptr %185, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %186 = getelementptr inbounds float, ptr %0, i64 48\n  store &lt;16 x float&gt; %180, ptr %186, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %187 = getelementptr inbounds float, ptr %0, i64 64\n  store &lt;16 x float&gt; %179, ptr %187, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %188 = getelementptr inbounds float, ptr %0, i64 80\n  store &lt;16 x float&gt; %178, ptr %188, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %189 = getelementptr inbounds float, ptr %0, i64 96\n  store &lt;16 x float&gt; %177, ptr %189, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %190 = getelementptr inbounds float, ptr %0, i64 112\n  store &lt;16 x float&gt; %176, ptr %190, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %191 = getelementptr inbounds float, ptr %0, i64 128\n  store &lt;16 x float&gt; %175, ptr %191, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %192 = getelementptr inbounds float, ptr %0, i64 144\n  store &lt;16 x float&gt; %174, ptr %192, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %193 = getelementptr inbounds float, ptr %0, i64 160\n  store &lt;16 x float&gt; %173, ptr %193, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %194 = getelementptr inbounds float, ptr %0, i64 176\n  store &lt;16 x float&gt; %172, ptr %194, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %195 = getelementptr inbounds float, ptr %0, i64 192\n  store &lt;16 x float&gt; %171, ptr %195, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %196 = getelementptr inbounds float, ptr %0, i64 208\n  store &lt;16 x float&gt; %170, ptr %196, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %197 = getelementptr inbounds float, ptr %0, i64 224\n  store &lt;16 x float&gt; %169, ptr %197, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  %198 = getelementptr inbounds float, ptr %0, i64 240\n  store &lt;16 x float&gt; %168, ptr %198, align 1, !tbaa !17, !alias.scope !367, !noalias !377\n  ret void\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#intermediate-file-optimizedbc-disassembled-to-optimizedll","title":"Intermediate file: <code>...optimized.bc</code>, disassembled to <code>...optimized.ll</code>","text":"<pre><code>; Function Attrs: nofree norecurse nosync nounwind\ndefine internal noundef i32 @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32(ptr noalias nocapture nonnull readonly align 16 %0, ptr noalias nocapture nonnull readonly align 16 %1, ptr noalias nocapture nonnull readonly align 16 %2) #1 !dbg !90 {\n  %.elt7 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 4, !dbg !91\n  %.unpack8 = load i32, ptr %.elt7, align 4, !dbg !91\n  %.elt9 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 5, !dbg !91\n  %.unpack10 = load i32, ptr %.elt9, align 16, !dbg !91\n  %.elt17 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 9, !dbg !91\n  %.unpack18 = load ptr, ptr %.elt17, align 8, !dbg !91\n  %.elt19 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t.19, ptr %1, i64 0, i32 10, !dbg !91\n  %.unpack20 = load ptr, ptr %.elt19, align 16, !dbg !91\n  %4 = getelementptr i32, ptr %.unpack18, i64 4, !dbg !91\n  %5 = load i64, ptr %4, align 4, !dbg !91\n  %6 = getelementptr i32, ptr %.unpack18, i64 6, !dbg !91\n  %7 = load i32, ptr %6, align 4, !dbg !91\n  %8 = getelementptr i32, ptr %.unpack18, i64 7, !dbg !91\n  %9 = load i32, ptr %8, align 4, !dbg !91\n  %10 = getelementptr i32, ptr %.unpack18, i64 8, !dbg !91\n  %11 = load i64, ptr %10, align 4, !dbg !91\n  %12 = getelementptr i32, ptr %.unpack18, i64 10, !dbg !91\n  %13 = load i64, ptr %12, align 4, !dbg !91\n  %14 = shl i64 %13, 4, !dbg !91\n  %15 = getelementptr i32, ptr %.unpack18, i64 14, !dbg !91\n  %16 = load i64, ptr %15, align 4, !dbg !91\n  %17 = shl i64 %16, 8, !dbg !91\n  %18 = zext i32 %7 to i64, !dbg !91\n  %19 = zext i32 %9 to i64, !dbg !91\n  %20 = shl nuw i64 %19, 32, !dbg !91\n  %21 = or disjoint i64 %20, %18, !dbg !91\n  %22 = load ptr, ptr %.unpack20, align 8, !dbg !91\n  %23 = getelementptr ptr, ptr %.unpack20, i64 1, !dbg !91\n  %24 = load ptr, ptr %23, align 8, !dbg !91\n  %25 = load %iree_hal_executable_workgroup_state_v0_t.20, ptr %2, align 16, !dbg !91\n  %26 = extractvalue %iree_hal_executable_workgroup_state_v0_t.20 %25, 0, !dbg !91\n  %27 = zext i32 %26 to i64, !dbg !91\n  %28 = zext i32 %.unpack8 to i64, !dbg !91\n  %29 = extractvalue %iree_hal_executable_workgroup_state_v0_t.20 %25, 1, !dbg !91\n  %30 = zext i32 %29 to i64, !dbg !91\n  %31 = zext i32 %.unpack10 to i64, !dbg !91\n  %32 = icmp sgt i64 %5, %30, !dbg !91\n  br i1 %32, label %.preheader.lr.ph, label %._crit_edge58, !dbg !91\n\n.preheader.lr.ph:                                 ; preds = %3\n  %33 = getelementptr i32, ptr %.unpack18, i64 3, !dbg !91\n  %34 = load i32, ptr %33, align 4, !dbg !91\n  %35 = zext i32 %34 to i64, !dbg !91\n  %36 = shl nuw i64 %35, 32, !dbg !91\n  %37 = getelementptr i32, ptr %.unpack18, i64 2, !dbg !91\n  %38 = load i32, ptr %37, align 4, !dbg !91\n  %39 = zext i32 %38 to i64, !dbg !91\n  %40 = or disjoint i64 %36, %39, !dbg !91\n  %41 = getelementptr i32, ptr %.unpack18, i64 1, !dbg !91\n  %42 = load i32, ptr %41, align 4, !dbg !91\n  %43 = zext i32 %42 to i64, !dbg !91\n  %44 = shl nuw i64 %43, 32, !dbg !91\n  %45 = load i32, ptr %.unpack18, align 4, !dbg !91\n  %46 = zext i32 %45 to i64, !dbg !91\n  %47 = or disjoint i64 %44, %46, !dbg !91\n  %48 = icmp sgt i64 %11, %27\n  %.lobit = ashr i64 %44, 63\n  %49 = xor i64 %47, %.lobit\n  %50 = sdiv i64 %49, 4\n  %51 = xor i64 %50, %.lobit\n  %.lobit24 = ashr i64 %36, 63\n  %52 = xor i64 %40, %.lobit24\n  %53 = sdiv i64 %52, 4\n  %54 = xor i64 %53, %.lobit24\n  %55 = icmp eq i64 %21, 0\n  %56 = shl i64 %21, 9\n  %57 = icmp sgt i64 %21, 0\n  br label %.preheader, !dbg !91\n\n.preheader:                                       ; preds = %._crit_edge, %.preheader.lr.ph\n  %58 = phi i64 [ %30, %.preheader.lr.ph ], [ %228, %._crit_edge ]\n  br i1 %48, label %.lr.ph, label %._crit_edge, !dbg !91\n\n.lr.ph:                                           ; preds = %.preheader\n  %59 = mul i64 %17, %58\n  %60 = add i64 %59, %54\n  %61 = mul i64 %56, %58\n  %62 = ashr exact i64 %61, 3\n  %63 = getelementptr inbounds i8, ptr %22, i64 %62\n  %64 = shl i64 %60, 2\n  %invariant.gep = getelementptr i8, ptr %24, i64 %64, !dbg !91\n  br label %65, !dbg !91\n\n65:                                               ; preds = %iree_uk_mmt4d.exit, %.lr.ph\n  %66 = phi i64 [ %27, %.lr.ph ], [ %226, %iree_uk_mmt4d.exit ]\n  br i1 %55, label %iree_uk_mmt4d.exit, label %67, !dbg !91\n\n67:                                               ; preds = %65\n  %68 = mul i64 %14, %66, !dbg !91\n  %69 = add i64 %68, %51, !dbg !91\n  %70 = shl i64 %69, 5, !dbg !91\n  %71 = ashr exact i64 %70, 3, !dbg !91\n  %72 = getelementptr inbounds i8, ptr %22, i64 %71, !dbg !91\n  %73 = shl i64 %66, 10, !dbg !91\n  %gep = getelementptr i8, ptr %invariant.gep, i64 %73, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %gep, i32 1, i32 1, i32 1), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %63, i32 0, i32 3, i32 1), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %72, i32 0, i32 3, i32 1), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !92), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !95), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !97), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !99), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !102), !dbg !91\n  tail call void @llvm.experimental.noalias.scope.decl(metadata !104), !dbg !91\n  tail call void @llvm.prefetch.p0(ptr %63, i32 0, i32 3, i32 1), !dbg !91, !noalias !106\n  tail call void @llvm.prefetch.p0(ptr %72, i32 0, i32 3, i32 1), !dbg !91, !noalias !107\n  %74 = load &lt;16 x float&gt;, ptr %gep, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %75 = getelementptr inbounds float, ptr %gep, i64 16, !dbg !91\n  %76 = load &lt;16 x float&gt;, ptr %75, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %77 = getelementptr inbounds float, ptr %gep, i64 32, !dbg !91\n  %78 = load &lt;16 x float&gt;, ptr %77, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %79 = getelementptr inbounds float, ptr %gep, i64 48, !dbg !91\n  %80 = load &lt;16 x float&gt;, ptr %79, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %81 = getelementptr inbounds float, ptr %gep, i64 64, !dbg !91\n  %82 = load &lt;16 x float&gt;, ptr %81, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %83 = getelementptr inbounds float, ptr %gep, i64 80, !dbg !91\n  %84 = load &lt;16 x float&gt;, ptr %83, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %85 = getelementptr inbounds float, ptr %gep, i64 96, !dbg !91\n  %86 = load &lt;16 x float&gt;, ptr %85, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %87 = getelementptr inbounds float, ptr %gep, i64 112, !dbg !91\n  %88 = load &lt;16 x float&gt;, ptr %87, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %89 = getelementptr inbounds float, ptr %gep, i64 128, !dbg !91\n  %90 = load &lt;16 x float&gt;, ptr %89, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %91 = getelementptr inbounds float, ptr %gep, i64 144, !dbg !91\n  %92 = load &lt;16 x float&gt;, ptr %91, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %93 = getelementptr inbounds float, ptr %gep, i64 160, !dbg !91\n  %94 = load &lt;16 x float&gt;, ptr %93, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %95 = getelementptr inbounds float, ptr %gep, i64 176, !dbg !91\n  %96 = load &lt;16 x float&gt;, ptr %95, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %97 = getelementptr inbounds float, ptr %gep, i64 192, !dbg !91\n  %98 = load &lt;16 x float&gt;, ptr %97, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %99 = getelementptr inbounds float, ptr %gep, i64 208, !dbg !91\n  %100 = load &lt;16 x float&gt;, ptr %99, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %101 = getelementptr inbounds float, ptr %gep, i64 224, !dbg !91\n  %102 = load &lt;16 x float&gt;, ptr %101, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  %103 = getelementptr inbounds float, ptr %gep, i64 240, !dbg !91\n  %104 = load &lt;16 x float&gt;, ptr %103, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  br i1 %57, label %.preheader.i, label %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, !dbg !91\n\n.preheader.i:                                     ; preds = %.preheader.i, %67\n  %105 = phi &lt;16 x float&gt; [ %204, %.preheader.i ], [ %104, %67 ], !dbg !91\n  %106 = phi &lt;16 x float&gt; [ %199, %.preheader.i ], [ %102, %67 ], !dbg !91\n  %107 = phi &lt;16 x float&gt; [ %194, %.preheader.i ], [ %100, %67 ], !dbg !91\n  %108 = phi &lt;16 x float&gt; [ %189, %.preheader.i ], [ %98, %67 ], !dbg !91\n  %109 = phi &lt;16 x float&gt; [ %184, %.preheader.i ], [ %96, %67 ], !dbg !91\n  %110 = phi &lt;16 x float&gt; [ %179, %.preheader.i ], [ %94, %67 ], !dbg !91\n  %111 = phi &lt;16 x float&gt; [ %174, %.preheader.i ], [ %92, %67 ], !dbg !91\n  %112 = phi &lt;16 x float&gt; [ %169, %.preheader.i ], [ %90, %67 ], !dbg !91\n  %113 = phi &lt;16 x float&gt; [ %164, %.preheader.i ], [ %88, %67 ], !dbg !91\n  %114 = phi &lt;16 x float&gt; [ %159, %.preheader.i ], [ %86, %67 ], !dbg !91\n  %115 = phi &lt;16 x float&gt; [ %154, %.preheader.i ], [ %84, %67 ], !dbg !91\n  %116 = phi &lt;16 x float&gt; [ %149, %.preheader.i ], [ %82, %67 ], !dbg !91\n  %117 = phi &lt;16 x float&gt; [ %144, %.preheader.i ], [ %80, %67 ], !dbg !91\n  %118 = phi &lt;16 x float&gt; [ %139, %.preheader.i ], [ %78, %67 ], !dbg !91\n  %119 = phi &lt;16 x float&gt; [ %134, %.preheader.i ], [ %76, %67 ], !dbg !91\n  %120 = phi &lt;16 x float&gt; [ %129, %.preheader.i ], [ %74, %67 ], !dbg !91\n  %121 = phi i64 [ %208, %.preheader.i ], [ 0, %67 ], !dbg !91\n  %122 = phi ptr [ %207, %.preheader.i ], [ %63, %67 ], !dbg !91\n  %123 = phi ptr [ %205, %.preheader.i ], [ %72, %67 ], !dbg !91\n  %124 = load &lt;16 x float&gt;, ptr %123, align 1, !dbg !91, !tbaa !108, !alias.scope !113, !noalias !107\n  %125 = getelementptr inbounds float, ptr %123, i64 128, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr nonnull %125, i32 0, i32 3, i32 1), !dbg !91, !noalias !107\n  %126 = load float, ptr %122, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %127 = insertelement &lt;16 x float&gt; poison, float %126, i64 0, !dbg !91\n  %128 = shufflevector &lt;16 x float&gt; %127, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %129 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %128, &lt;16 x float&gt; %124, &lt;16 x float&gt; %120), !dbg !91\n  %130 = getelementptr inbounds float, ptr %122, i64 1, !dbg !91\n  %131 = load float, ptr %130, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %132 = insertelement &lt;16 x float&gt; poison, float %131, i64 0, !dbg !91\n  %133 = shufflevector &lt;16 x float&gt; %132, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %134 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %133, &lt;16 x float&gt; %124, &lt;16 x float&gt; %119), !dbg !91\n  %135 = getelementptr inbounds float, ptr %122, i64 2, !dbg !91\n  %136 = load float, ptr %135, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %137 = insertelement &lt;16 x float&gt; poison, float %136, i64 0, !dbg !91\n  %138 = shufflevector &lt;16 x float&gt; %137, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %139 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %138, &lt;16 x float&gt; %124, &lt;16 x float&gt; %118), !dbg !91\n  %140 = getelementptr inbounds float, ptr %122, i64 3, !dbg !91\n  %141 = load float, ptr %140, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %142 = insertelement &lt;16 x float&gt; poison, float %141, i64 0, !dbg !91\n  %143 = shufflevector &lt;16 x float&gt; %142, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %144 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %143, &lt;16 x float&gt; %124, &lt;16 x float&gt; %117), !dbg !91\n  %145 = getelementptr inbounds float, ptr %122, i64 4, !dbg !91\n  %146 = load float, ptr %145, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %147 = insertelement &lt;16 x float&gt; poison, float %146, i64 0, !dbg !91\n  %148 = shufflevector &lt;16 x float&gt; %147, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %149 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %148, &lt;16 x float&gt; %124, &lt;16 x float&gt; %116), !dbg !91\n  %150 = getelementptr inbounds float, ptr %122, i64 5, !dbg !91\n  %151 = load float, ptr %150, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %152 = insertelement &lt;16 x float&gt; poison, float %151, i64 0, !dbg !91\n  %153 = shufflevector &lt;16 x float&gt; %152, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %154 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %153, &lt;16 x float&gt; %124, &lt;16 x float&gt; %115), !dbg !91\n  %155 = getelementptr inbounds float, ptr %122, i64 6, !dbg !91\n  %156 = load float, ptr %155, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %157 = insertelement &lt;16 x float&gt; poison, float %156, i64 0, !dbg !91\n  %158 = shufflevector &lt;16 x float&gt; %157, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %159 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %158, &lt;16 x float&gt; %124, &lt;16 x float&gt; %114), !dbg !91\n  %160 = getelementptr inbounds float, ptr %122, i64 7, !dbg !91\n  %161 = load float, ptr %160, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %162 = insertelement &lt;16 x float&gt; poison, float %161, i64 0, !dbg !91\n  %163 = shufflevector &lt;16 x float&gt; %162, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %164 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %163, &lt;16 x float&gt; %124, &lt;16 x float&gt; %113), !dbg !91\n  %165 = getelementptr inbounds float, ptr %122, i64 8, !dbg !91\n  %166 = load float, ptr %165, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %167 = insertelement &lt;16 x float&gt; poison, float %166, i64 0, !dbg !91\n  %168 = shufflevector &lt;16 x float&gt; %167, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %169 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %168, &lt;16 x float&gt; %124, &lt;16 x float&gt; %112), !dbg !91\n  %170 = getelementptr inbounds float, ptr %122, i64 9, !dbg !91\n  %171 = load float, ptr %170, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %172 = insertelement &lt;16 x float&gt; poison, float %171, i64 0, !dbg !91\n  %173 = shufflevector &lt;16 x float&gt; %172, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %174 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %173, &lt;16 x float&gt; %124, &lt;16 x float&gt; %111), !dbg !91\n  %175 = getelementptr inbounds float, ptr %122, i64 10, !dbg !91\n  %176 = load float, ptr %175, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %177 = insertelement &lt;16 x float&gt; poison, float %176, i64 0, !dbg !91\n  %178 = shufflevector &lt;16 x float&gt; %177, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %179 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %178, &lt;16 x float&gt; %124, &lt;16 x float&gt; %110), !dbg !91\n  %180 = getelementptr inbounds float, ptr %122, i64 11, !dbg !91\n  %181 = load float, ptr %180, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %182 = insertelement &lt;16 x float&gt; poison, float %181, i64 0, !dbg !91\n  %183 = shufflevector &lt;16 x float&gt; %182, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %184 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %183, &lt;16 x float&gt; %124, &lt;16 x float&gt; %109), !dbg !91\n  %185 = getelementptr inbounds float, ptr %122, i64 12, !dbg !91\n  %186 = load float, ptr %185, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %187 = insertelement &lt;16 x float&gt; poison, float %186, i64 0, !dbg !91\n  %188 = shufflevector &lt;16 x float&gt; %187, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %189 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %188, &lt;16 x float&gt; %124, &lt;16 x float&gt; %108), !dbg !91\n  %190 = getelementptr inbounds float, ptr %122, i64 13, !dbg !91\n  %191 = load float, ptr %190, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %192 = insertelement &lt;16 x float&gt; poison, float %191, i64 0, !dbg !91\n  %193 = shufflevector &lt;16 x float&gt; %192, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %194 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %193, &lt;16 x float&gt; %124, &lt;16 x float&gt; %107), !dbg !91\n  %195 = getelementptr inbounds float, ptr %122, i64 14, !dbg !91\n  %196 = load float, ptr %195, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %197 = insertelement &lt;16 x float&gt; poison, float %196, i64 0, !dbg !91\n  %198 = shufflevector &lt;16 x float&gt; %197, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %199 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %198, &lt;16 x float&gt; %124, &lt;16 x float&gt; %106), !dbg !91\n  %200 = getelementptr inbounds float, ptr %122, i64 15, !dbg !91\n  %201 = load float, ptr %200, align 4, !dbg !91, !tbaa !114, !alias.scope !116, !noalias !106\n  %202 = insertelement &lt;16 x float&gt; poison, float %201, i64 0, !dbg !91\n  %203 = shufflevector &lt;16 x float&gt; %202, &lt;16 x float&gt; poison, &lt;16 x i32&gt; zeroinitializer, !dbg !91\n  %204 = tail call &lt;16 x float&gt; @llvm.fma.v16f32(&lt;16 x float&gt; %203, &lt;16 x float&gt; %124, &lt;16 x float&gt; %105), !dbg !91\n  %205 = getelementptr inbounds float, ptr %123, i64 16, !dbg !91\n  %206 = getelementptr inbounds float, ptr %122, i64 128, !dbg !91\n  tail call void @llvm.prefetch.p0(ptr nonnull %206, i32 0, i32 3, i32 1), !dbg !91, !noalias !106\n  %207 = getelementptr inbounds float, ptr %122, i64 16, !dbg !91\n  %208 = add nuw nsw i64 %121, 1, !dbg !91\n  %209 = icmp eq i64 %208, %21, !dbg !91\n  br i1 %209, label %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, label %.preheader.i, !dbg !91, !llvm.loop !117\n\niree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit: ; preds = %.preheader.i, %67\n  %210 = phi &lt;16 x float&gt; [ %104, %67 ], [ %204, %.preheader.i ], !dbg !91\n  %211 = phi &lt;16 x float&gt; [ %102, %67 ], [ %199, %.preheader.i ], !dbg !91\n  %212 = phi &lt;16 x float&gt; [ %100, %67 ], [ %194, %.preheader.i ], !dbg !91\n  %213 = phi &lt;16 x float&gt; [ %98, %67 ], [ %189, %.preheader.i ], !dbg !91\n  %214 = phi &lt;16 x float&gt; [ %96, %67 ], [ %184, %.preheader.i ], !dbg !91\n  %215 = phi &lt;16 x float&gt; [ %94, %67 ], [ %179, %.preheader.i ], !dbg !91\n  %216 = phi &lt;16 x float&gt; [ %92, %67 ], [ %174, %.preheader.i ], !dbg !91\n  %217 = phi &lt;16 x float&gt; [ %90, %67 ], [ %169, %.preheader.i ], !dbg !91\n  %218 = phi &lt;16 x float&gt; [ %88, %67 ], [ %164, %.preheader.i ], !dbg !91\n  %219 = phi &lt;16 x float&gt; [ %86, %67 ], [ %159, %.preheader.i ], !dbg !91\n  %220 = phi &lt;16 x float&gt; [ %84, %67 ], [ %154, %.preheader.i ], !dbg !91\n  %221 = phi &lt;16 x float&gt; [ %82, %67 ], [ %149, %.preheader.i ], !dbg !91\n  %222 = phi &lt;16 x float&gt; [ %80, %67 ], [ %144, %.preheader.i ], !dbg !91\n  %223 = phi &lt;16 x float&gt; [ %78, %67 ], [ %139, %.preheader.i ], !dbg !91\n  %224 = phi &lt;16 x float&gt; [ %76, %67 ], [ %134, %.preheader.i ], !dbg !91\n  %225 = phi &lt;16 x float&gt; [ %74, %67 ], [ %129, %.preheader.i ], !dbg !91\n  store &lt;16 x float&gt; %225, ptr %gep, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %224, ptr %75, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %223, ptr %77, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %222, ptr %79, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %221, ptr %81, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %220, ptr %83, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %219, ptr %85, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %218, ptr %87, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %217, ptr %89, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %216, ptr %91, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %215, ptr %93, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %214, ptr %95, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %213, ptr %97, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %212, ptr %99, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %211, ptr %101, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  store &lt;16 x float&gt; %210, ptr %103, align 1, !dbg !91, !tbaa !108, !alias.scope !111, !noalias !112\n  br label %iree_uk_mmt4d.exit, !dbg !91\n\niree_uk_mmt4d.exit:                               ; preds = %iree_uk_mmt4d_tile_f32f32f32_16x16x1_x86_64_avx512_base.exit, %65\n  %226 = add i64 %66, %28, !dbg !91\n  %227 = icmp slt i64 %226, %11, !dbg !91\n  br i1 %227, label %65, label %._crit_edge, !dbg !91\n\n._crit_edge:                                      ; preds = %iree_uk_mmt4d.exit, %.preheader\n  %228 = add i64 %58, %31, !dbg !91\n  %229 = icmp slt i64 %228, %5, !dbg !91\n  br i1 %229, label %.preheader, label %._crit_edge58, !dbg !91\n\n._crit_edge58:                                    ; preds = %._crit_edge, %3\n  ret i32 0, !dbg !91\n}\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/#x86-assembly","title":"x86 assembly","text":"<pre><code>  .section  .text.matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32,\"ax\",@progbits\n  .p2align  4, 0x90\n  .type  matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32,@function\nmatmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32:\n.Lfunc_begin3:\n  .loc  1 1 0 is_stmt 1\n  .cfi_startproc\n  push  rbp\n  .cfi_def_cfa_offset 16\n  .cfi_offset rbp, -16\n  mov  rbp, rsp\n  .cfi_def_cfa_register rbp\n.Ltmp6:\n  push  r15\n  push  r14\n  push  r13\n  push  r12\n  push  rbx\n  .cfi_offset rbx, -56\n  .cfi_offset r12, -48\n  .cfi_offset r13, -40\n  .cfi_offset r14, -32\n  .cfi_offset r15, -24\n  .loc  1 1 1 prologue_end\n  mov  rcx, qword ptr [rsi + 24]\n  mov  edi, dword ptr [rdx + 4]\n  mov  rax, qword ptr [rcx + 16]\n  mov  qword ptr [rbp - 48], rdi\n  mov  qword ptr [rbp - 112], rax\n  cmp  rax, rdi\n  jle  .LBB3_11\n  mov  eax, dword ptr [rsi + 16]\n  mov  edi, dword ptr [rsi + 12]\n  mov  r12, qword ptr [rsi + 32]\n  mov  rsi, qword ptr [rcx + 40]\n  mov  r9, qword ptr [rcx + 56]\n  mov  ebx, dword ptr [rcx + 4]\n  mov  r10d, dword ptr [rcx]\n  mov  r11, qword ptr [rcx + 24]\n  mov  r14, qword ptr [rcx + 32]\n  mov  r8, rsi\n  shl  r8, 4\n  mov  qword ptr [rbp - 104], rax\n  shl  r9, 8\n  mov  rax, qword ptr [r12 + 8]\n  shl  rbx, 32\n  mov  qword ptr [rbp - 128], r8\n  mov  r8d, dword ptr [rcx + 12]\n  mov  qword ptr [rbp - 96], r9\n  mov  r9d, dword ptr [rcx + 8]\n  or  r10, rbx\n  sar  rbx, 63\n  xor  r10, rbx\n  lea  r15, [r10 + 3]\n  mov  qword ptr [rbp - 80], rax\n  mov  eax, dword ptr [rdx]\n  shl  r8, 32\n  or  r9, r8\n  test  r10, r10\n  cmovns  r15, r10\n  sar  r8, 63\n  sar  r15, 2\n  xor  r9, r8\n  xor  r15, rbx\n  lea  rcx, [r9 + 3]\n  test  r9, r9\n  mov  qword ptr [rbp - 56], rax\n  cmovns  rcx, r9\n  imul  rax, rsi\n  mov  r9, qword ptr [r12]\n  imul  rsi, rdi\n  mov  qword ptr [rbp - 120], r15\n  sar  rcx, 2\n  xor  rcx, r8\n  shl  rax, 6\n  mov  qword ptr [rbp - 88], rcx\n  mov  rcx, r11\n  shl  rcx, 9\n  shl  rsi, 6\n  lea  rax, [rax + 4*r15]\n  mov  qword ptr [rbp - 72], rcx\n  mov  qword ptr [rbp - 64], rax\n  jmp  .LBB3_2\n  .p2align  4, 0x90\n.LBB3_10:\n  .loc  1 0 1 is_stmt 0\n  mov  rax, qword ptr [rbp - 48]\n  .loc  1 1 1\n  add  rax, qword ptr [rbp - 104]\n  mov  qword ptr [rbp - 48], rax\n  cmp  rax, qword ptr [rbp - 112]\n  jge  .LBB3_11\n.LBB3_2:\n  .loc  1 0 1\n  cmp  r14, qword ptr [rbp - 56]\n  .loc  1 1 1\n  jle  .LBB3_10\n  .loc  1 0 1\n  mov  rax, qword ptr [rbp - 96]\n  mov  rcx, qword ptr [rbp - 48]\n  mov  r10, qword ptr [rbp - 72]\n  mov  rdx, qword ptr [rbp - 80]\n  mov  r8, qword ptr [rbp - 64]\n  imul  rax, rcx\n  add  rax, qword ptr [rbp - 88]\n  imul  r10, rcx\n  sar  r10, 3\n  lea  r13, [r9 + r10]\n  .loc  1 1 1\n  lea  r15, [rdx + 4*rax]\n  mov  rax, qword ptr [rbp - 56]\n  jmp  .LBB3_4\n  .p2align  4, 0x90\n.LBB3_8:\n  add  rdx, r15\n  vmovups  zmmword ptr [rdx], zmm15\n  vmovups  zmmword ptr [rdx + 64], zmm14\n  vmovups  zmmword ptr [rdx + 128], zmm13\n  vmovups  zmmword ptr [rdx + 192], zmm12\n  vmovups  zmmword ptr [rdx + 256], zmm11\n  vmovups  zmmword ptr [rdx + 320], zmm10\n  vmovups  zmmword ptr [rdx + 384], zmm9\n  vmovups  zmmword ptr [rdx + 448], zmm8\n  vmovups  zmmword ptr [rdx + 512], zmm7\n  vmovups  zmmword ptr [rdx + 576], zmm6\n  vmovups  zmmword ptr [rdx + 640], zmm5\n  vmovups  zmmword ptr [rdx + 704], zmm4\n  vmovups  zmmword ptr [rdx + 768], zmm3\n  vmovups  zmmword ptr [rdx + 832], zmm2\n  vmovups  zmmword ptr [rdx + 896], zmm1\n  vmovups  zmmword ptr [rdx + 960], zmm0\n.LBB3_9:\n  add  rax, rdi\n  add  r8, rsi\n  cmp  rax, r14\n  jge  .LBB3_10\n.LBB3_4:\n  .loc  1 0 1\n  test  r11, r11\n  .loc  1 1 1\n  je  .LBB3_9\n  .loc  1 0 1\n  mov  rcx, qword ptr [rbp - 128]\n  .loc  1 1 1\n  mov  rdx, rax\n  shl  rdx, 10\n  prefetchw  byte ptr [r15 + rdx]\n  prefetcht0  byte ptr [r13]\n  imul  rcx, rax\n  add  rcx, qword ptr [rbp - 120]\n  shl  rcx, 5\n  sar  rcx, 3\n  prefetcht0  byte ptr [r9 + rcx]\n  prefetcht0  byte ptr [r13]\n  prefetcht0  byte ptr [r9 + rcx]\n  vmovups  zmm15, zmmword ptr [r15 + rdx]\n  vmovups  zmm14, zmmword ptr [r15 + rdx + 64]\n  vmovups  zmm13, zmmword ptr [r15 + rdx + 128]\n  vmovups  zmm12, zmmword ptr [r15 + rdx + 192]\n  vmovups  zmm11, zmmword ptr [r15 + rdx + 256]\n  vmovups  zmm10, zmmword ptr [r15 + rdx + 320]\n  vmovups  zmm9, zmmword ptr [r15 + rdx + 384]\n  vmovups  zmm8, zmmword ptr [r15 + rdx + 448]\n  vmovups  zmm7, zmmword ptr [r15 + rdx + 512]\n  vmovups  zmm6, zmmword ptr [r15 + rdx + 576]\n  vmovups  zmm5, zmmword ptr [r15 + rdx + 640]\n  vmovups  zmm4, zmmword ptr [r15 + rdx + 704]\n  vmovups  zmm3, zmmword ptr [r15 + rdx + 768]\n  vmovups  zmm2, zmmword ptr [r15 + rdx + 832]\n  vmovups  zmm1, zmmword ptr [r15 + rdx + 896]\n  vmovups  zmm0, zmmword ptr [r15 + rdx + 960]\n  test  r11, r11\n  jle  .LBB3_8\n  .loc  1 0 1\n  lea  rcx, [8*r8]\n  mov  r12, r9\n  mov  rbx, r11\n  sar  rcx, 3\n  add  rcx, 512\n  .p2align  4, 0x90\n.LBB3_7:\n  .loc  1 1 1\n  vmovups  zmm16, zmmword ptr [r12 + rcx - 512]\n  prefetcht0  byte ptr [r12 + rcx]\n  vfmadd231ps  zmm15, zmm16, dword ptr [r12 + r10]{1to16}\n  vfmadd231ps  zmm14, zmm16, dword ptr [r12 + r10 + 4]{1to16}\n  vfmadd231ps  zmm13, zmm16, dword ptr [r12 + r10 + 8]{1to16}\n  vfmadd231ps  zmm12, zmm16, dword ptr [r12 + r10 + 12]{1to16}\n  vfmadd231ps  zmm11, zmm16, dword ptr [r12 + r10 + 16]{1to16}\n  vfmadd231ps  zmm10, zmm16, dword ptr [r12 + r10 + 20]{1to16}\n  vfmadd231ps  zmm9, zmm16, dword ptr [r12 + r10 + 24]{1to16}\n  vfmadd231ps  zmm8, zmm16, dword ptr [r12 + r10 + 28]{1to16}\n  vfmadd231ps  zmm7, zmm16, dword ptr [r12 + r10 + 32]{1to16}\n  vfmadd231ps  zmm6, zmm16, dword ptr [r12 + r10 + 36]{1to16}\n  vfmadd231ps  zmm5, zmm16, dword ptr [r12 + r10 + 40]{1to16}\n  vfmadd231ps  zmm4, zmm16, dword ptr [r12 + r10 + 44]{1to16}\n  vfmadd231ps  zmm3, zmm16, dword ptr [r12 + r10 + 48]{1to16}\n  vfmadd231ps  zmm2, zmm16, dword ptr [r12 + r10 + 52]{1to16}\n  vfmadd231ps  zmm1, zmm16, dword ptr [r12 + r10 + 56]{1to16}\n  vfmadd231ps  zmm0, zmm16, dword ptr [r12 + r10 + 60]{1to16}\n  prefetcht0  byte ptr [r12 + r10 + 512]\n  add  r12, 64\n  dec  rbx\n  jne  .LBB3_7\n  jmp  .LBB3_8\n.LBB3_11:\n  xor  eax, eax\n  .loc  1 1 1 epilogue_begin\n  pop  rbx\n  pop  r12\n  pop  r13\n  pop  r14\n  pop  r15\n  pop  rbp\n  .cfi_def_cfa rsp, 8\n  vzeroupper\n  ret\n.Ltmp7:\n.Lfunc_end3:\n  .size  matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32, .Lfunc_end3-matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32\n  .cfi_endproc\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/","title":"Matrix Multiplication with MMT4D","text":"","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#introduction","title":"Introduction","text":"<p>Matrix multiplication (matmul) is an important operation in ML workloads that poses specific challenges to code generation. For example, matmul makes repeated accesses to the same data, which makes locality of reference a top concern.</p> <p>Moreover, modern CPUs instruction set architectures (ISAs) offer specialized SIMD instructions that the matmul implementation needs to use to achieve optimal performance, and these instructions expect data to be in a particular layout.</p> <p>This article is about an in-development MLIR operation, <code>linalg.mmt4d</code>, offering a compilation path for <code>linalg.matmul</code> that is designed from the ground up for these efficiency considerations.</p> <p>We are still in the early implementation phase of this <code>linalg.mmt4d</code> plan, but we feel confident that we know where we are going because what we are really doing here is importing into the compiler what we have learned working on optimized matrix multiplication libraries, particularly Ruy. We know what loop schedule and kernel we want the compiler to generate \u2014 essentially the same as we wrote in Ruy, give or take additional optimizations such as fusions and constant folding that become possible now that we are doing this within a compiler. This allows us to focus on how we get the compiler to generate that schedule and kernel with purely algebraic transformations that compose and enable further compiler optimizations.</p> <p>At the basis of this work is the extensible op system of the Linalg dialect in the MLIR compiler toolkit. In this case, a general purpose, mixed precision mmt4d op is defined via a high level description directly in the compiler and is then available to both users of the compiler (as a <code>linalg.mmt4d</code> op) or for direct emission via Python based IR construction (i.e. for direct integration into high level frameworks without rebuilding the compiler). The ability to define such new special forms cheaply, and without any systemic framework level cost, is part of the extensibility and composition story that we expect will become increasingly important in development and deployment scenarios in the future, and in this case, it let us spring board off of high quality code generation which was already well integrated and composed well with other features of the compiler.</p>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#existing-matrix-multplication-code-generation","title":"Existing Matrix Multplication Code Generation","text":"<p>Let us start by discussing IREE\u2019s existing matmul code generation and highlight the issues that <code>mmt4d</code> aims to overcome.</p> <p>The existing approach operates in-place on the source matrices. When we discuss \"tiling\" in this paragraph, we refer exclusively to the traversal \u2014 how these source matrices are traversed by the matmul loop. There is no \"tiled layout\" here, which will be the key difference with <code>mmt4d</code> below.</p> <p>The destination matrix is tiled into workgroups (CPU threads) tiles, then each workgroup tile is tiled to fit some level of CPU cache, and finally each tile is further tiled to fit target architecture registers (e.g. 8x8).</p> <p>That multi-level tiling means that the code works like the following loop nest:</p> <pre><code>def tiled_matmul(A, B, C, tile_m, tile_n, tile_k, tile_m_v, tile_n_v, tile_k_v):\n m = A.shape[0]\n k = A.shape[1]\n n = B.shape[1]\n for m1 in range(0, m, tile_m):\n   for n1 in range(0, n, tile_n):\n     for k1 in range(0, k, tile_k):\n       # First level of tiling views...\n       lhs_tile = A[m1:m1+tile_m, k1:k1+tile_k]\n       rhs_tile = B[k1:k1+tile_k, n1:n1+tile_n]\n       dst_tile = C[m1:m1+tile_m, n1:n1+tile_n]\n       for mv in range(0, tile_m, tile_m_v):\n         for nv in range(0, tile_n, tile_n_v):\n           for kv in range(0, tile_k, tile_k_v):\n             # Register tiling views...\n             lhs_tile_v = lhs_tile[mv:mv+tile_m_v, kv:kv+tile_k_v]\n             rhs_tile_v = rhs_tile[kv:kv+tile_k_v, nv:nv+tile_n_v]\n             # kernel.\n             dst_tile[mv:mv+tile_m_v, nv:nv+tile_n_v] += np.matmul(lhs_tile_v, rhs_tile_v)\n return C\n</code></pre> <p>The two main problems with this approach are:</p> <ul> <li> <p>Overhead to meet SIMD ISA layout requirements: In practice, the kernel     needs to use specific SIMD     instructions to perform the arithmetic. They expect small tiles of the     matrices to be loaded in registers, in a specific layout. If the matrix data     wasn't already stored in memory in such a tiled layout, then the kernel has     to perform such a data rearrangement on the fly, incurring substantial     overhead. For NxN matrix multiplication, the kernel performs     O(N<sup>3</sup>) work on O(N<sup>2</sup>) data, so doing that rearrangement     there means O(N<sup>3</sup>) overhead where O(N<sup>2</sup>) should have     sufficed, as this could have been done as a pre-processing step on     O(N<sup>2</sup>) data.</p> </li> <li> <p>Inefficent memory traversal: For efficiency reasons, we always need     <code>tile_m_v&gt;1</code> and <code>tile_n_v&gt;1</code>. That is because the higher these values, the     fewer memory-load instructions are needed overall; and this is also dictated     by the SIMD instructions that we want to use. But that means that the kernel     is accessing simultaneously multiple rows or columns of the left-hand and     right-hand side matrices. And in this existing approach, they are stored in     linear layout, not in a tiled layout, so these accesses are not contiguous     in memory. This is detrimental to memory access performance, meaning the     CPU caches, in multiple ways. One     is that these multiple non-contiguous accesses may alias each other in the     L1 cache because of low     associativity.</p> </li> </ul>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#matrix-multiplication-operation-with-4d-tiled-operands","title":"Matrix Multiplication Operation With 4D Tiled Operands","text":"<p>For the reasons above, an efficient matmul implementation must reorder data into a tiled layout matching the target SIMD ISA and making the memory access patterns as contiguous as possible.</p> <p>IREE/MLIR defaults to bufferizing all tensors into a \"row-major\" order, meaning that the last-enumerated dimension is the one that is contiguous in memory. As we prefer not to write custom bufferization code, we can't specify an alternative layout for a tensor. Fortunately, it is possible to represent a 2D tiled layout as a 4D layout. For example, <code>tensor&lt;2x2x2x2xf32&gt;</code> can represent a 4x4 matrix made of 2x2 tiles, each of which is 2x2. The row-major layout on <code>tensor&lt;2x2x2x2xf32&gt;</code> makes each 2x2 tile contiguous and row-major, and arranges the 2x2 tiles themselves into a row-major 2x2 layout in the overall 4x4 matrix.</p> <p>Such a row-major-tiled layout is exactly what we need for the left-hand side of a matrix multiplication, because matrix multiplication traverses the left-hand side matrix row by row. But for the right-hand side matrix, we want a column-major-tiled layout. To solve this problem, we decide to implement not matrix multiplication, but matrix-multiplication-by-transposed-right-hand-side which is where the <code>t</code> in the <code>linalg.mmt4d</code> came from. Now such an op is happy with both the left and right-hand sides being row-major-tiled.</p> <p>The following example illustrates that. In these diagrams, each matrix element is rendered its memory offset.</p> <p></p> <p>To compute the 2x2 block in the destination matrix, we will have to load two yellow blocks from LHS, RHS matrices respectively compute their matmul results (i.e. call the kernel), then the two blue blocks, and so on. As we can see, each tile loads data that is not contiguous. It would be better if we rearranged the elements in the following layout:</p> <p></p> <p>Now tiles are stored contiguously in memory and the kernel can simply load them from memory into the registers that will be directly consumed by the SIMD instructions performing the multiplications. Moreover, the kernel is now loading from just two contiguous data streams, a simple memory access pattern which is sure to be efficient (regarding caches, etc) on any reasonable target hardware.</p> <p>We introduce a <code>linalg.mmt4d</code> operation that performs such a matrix multiplication on matrices in a tiled layout represented as 4D tensors. That leaves the question of how to represent, within the linalg dialect, the conversions between ordinary matrices represented as 2D tensors, and these tiled matrices represented as 4D tensors. Moreover, these conversions should be tileable and decompose well. Thankfully, the transformation from 2D to 4D can be written as a reshape followed by a transpose as in the following digram:</p> <p></p> <p>So we can think of the outermost two dimensions of the 4D representations as the tile position in the overall matrix, and the innermost two as the element position within one tile. Hopefully the following Python pseudocode makes it more concrete:</p> <pre><code>def pack_2d_4d(operand, parallel_size, reduction_size):\n i1 = operand.shape[0] // parallel_size # M1\n i2 = parallel_size    # M0\n j1 = operand.shape[1] // reduction_size # K1\n j2 = reduction_size   # K0\n operand_4d = np.reshape(operand, [i1, i2, j1, j2])\n return np.transpose(operand_4d, [0, 2, 1, 3]) # [M1, K1, M0, K0]\n</code></pre> <p>Now the mmt4d operation will follow a structure as the multi level tiling, for simplicity we considered the case here where no L1 tiling is required only first level of distribution to workgroups:</p> <pre><code>def mmt4d(A, B, C, M0, N0, K0):\n M = A.shape[0]\n N = B.shape[1]\n Bt = np.transpose(B, [1, 0])\n A4d = pack_2d_4d(A, M0, K0)\n Bt4d = pack_2d_4d(Bt, N0, K0)\n M1 = A4d.shape[0]\n N1 = Bt4d.shape[0]\n K1 = A4d.shape[1]\n for m1 in range(0, M1):\n   for n1 in range(0, N1):\n     for k1 in range(0, K1):\n       # Tile views that are contiguous in memory.\n       lhs_tile = np.reshape(A4d[m1, k1, :, :], [M0, K0])\n       rhs_tile = np.reshape(Bt4d[n1, k1, :, :], [N0, K0])\n       # Inner kernel.\n       C[m1, n1, :, :] += np.matmul(lhs_tile, np.transpose(rhs_tile, [1, 0]))\n # 4d -&gt; 2D\n C2d = unpack_4d_2d(C)\n return C2d\n</code></pre> <p>The resulting 4D tiled matrix still needs be rearranged back to the original layout as 2D tensor:</p> <pre><code>def unpack_4d_2d(operand):\n i1 = operand.shape[0] # M1\n j1 = operand.shape[1] # N1\n i2 = operand.shape[2] # M0\n j2 = operand.shape[3] # N0\n operand_transposed = operand.transpose([0, 2, 1, 3]) # [M1, M0, N1, N0]\n return operand_transposed.reshape([i1 * i2, j1 * j2]) # [M, N]\n</code></pre>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#performance-results","title":"Performance Results","text":"<p>We benchmarked various float32 matmul problems of different sizes and the result showed that mmt4d is faster than the existing matmul implementation for bigger matrices as we can see the in the following chart:</p> <p></p> <p>The SIMD instruction being used here is the simplest kind, a <code>vector*scalar</code> multiplication, and the storage orders of the matrices allow the existing implementation to directly load the vectors from the source matrices without any rearrangement overhead. So this case is particularly friendly to the existing code, which is why the mmt4d code is only faster for bigger matrices. To understand why mmt4d is faster in that case, we collected statistics of L1 cache misses:</p> <p></p> <p>This shows that in this case, the better cache-friendliness of mmt4d, thanks to its simple contiguous memory access pattern, accounts for its higher performance.</p> <p>As we proceed with increasingly sophisticated SIMD targets, starting with the dot-product instructions found in current mobile devices for the int8 case and going to become generalized to all data types all the way to float32 over the next few years with upcoming ARM SIMD instructions, the advantage of mmt4d will widen for all sizes, not just the larger ones.</p> <p>Part of why we feel confident about the eventual performance that our approach will achieve is that, as mentioned in the introduction, we are rebuilding within the compiler an existing library's schedule and kernel, and we have benchmark results about it.</p>","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#conclusion","title":"Conclusion","text":"<p>We introduced a 4d tiled representation for 2d matrix-matrix multiplication with a decomposable algebric transformations that requires only reshape and transpose of input operands, we discussed and empirically showed how that solves major drawbacks in row-major linear matmul by providing a flexible way to match different ISA layout along with better cache locality achieving near peak performance.</p> <p>As was mentioned in the introduction, this work in under active development and the next immediate steps are to prove the rest of the hypothesis by:</p> <ul> <li> <p>Handling dynamic sizes and padding to the next multiple of the target tile   size.</p> </li> <li> <p>Implementing the integer case (<code>int32 += int8 * int8</code>).</p> </li> <li> <p>Implementing the dispatch to different SIMD ISA variants at runtime.</p> </li> <li> <p>Implementing cache-friendly traversal for larger matmuls and multi-threading   by interfacing with IREE's runtime dispatch.</p> </li> <li> <p>Improving the generated code by fusing the 4d tiled layout with the   producers and consumers of the <code>linalg.mmt4d</code>.</p> </li> </ul>","tags":["CPU"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/","title":"TFLite support via TOSA","text":"<p>IREE can now execute TensorFlow Lite (TFLite) models through the use of TOSA, an open standard of common tensor operations, and a part of MLIR core. TOSA\u2019s high-level representation of tensor operations provides a common front-end for ingesting models from different frameworks. In this case we ingest a TFLite FlatBuffer and compile it to TOSA IR, which IREE takes as an input format to compile to its various backends.</p> <p></p> <p>Using TFLite as a frontend for IREE provides an alternative ingestion method for already existing models that could benefit from IREE\u2019s design. This enables models already designed for on-device inference to have an alternative path for execution without requiring any additional porting, while benefiting from IREE\u2019s improvements in buffer management, work dispatch system, and compact binary format. With continued improvements to IREE/MLIR\u2019s compilation performance, more optimized versions can be compiled and distributed to target devices without an update to the clientside environment.</p> <p>Today, we have validated floating point support for a variety of models, including mobilenet (v1, v2, and v3) and mobilebert. More work is in progress to support fully quantized models, and TFLite\u2019s hybrid quantization, along with dynamic shape support.</p>","tags":["TensorFlow"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/#examples","title":"Examples","text":"<p>TFLite with IREE is available in Python and Java.  We have a colab notebook that shows how to use IREE\u2019s python bindings and TFLite compiler tools to compile a pre-trained TFLite model from a FlatBuffer and run using IREE.  We also have an Android Java app that was forked from an existing TFLite demo app, swapping out the TFLite library for our own AAR.  More information on IREE\u2019s TFLite frontend is available here.</p>","tags":["TensorFlow"]},{"location":"developers/","title":"Developers","text":"<p>These pages cover topics useful for project maintainers and contributors.</p> <p>Caution</p> <p>Some of these pages may be stale. Contributions are always welcome!</p>"},{"location":"developers/usage-best-practices/","title":"Usage best practices","text":"<p>This page contains a list of best practices for getting the most out of IREE, spanning model authoring, ahead-of-time compilation, and runtime use. Treat these as a collection of ideas to consider or areas to start benchmarking when working on your own applications.</p>"},{"location":"developers/usage-best-practices/#introduction","title":"Introduction","text":"<p>Common themes include:</p> <ul> <li>Give the compiler as much information as possible</li> <li>Give the compiler opportunities to batch work together or defer computation</li> <li>Keep compute devices saturated with work through pipelining</li> <li>Use dense math where possible, particularly for inner loop bodies</li> <li>Limit synchronization points between devices like CPUs and GPUs</li> <li>Profile early and often, using the right tools for each level of granularity</li> </ul>"},{"location":"developers/usage-best-practices/#practices-for-model-authoring","title":"Practices for model authoring","text":""},{"location":"developers/usage-best-practices/#track-state-within-your-model-when-possible","title":"Track state within your model when possible","text":"<p>If your model is stateful prefer to store that state directly within your program rather than externalizing it through arguments and return values. By keeping state inside your program the compiler is better able to reason about it and function calls will have lower overhead.</p> <p>If you do externalize state, try to pack that state into a limited number of arguments.</p> <p>See the variables and state sample for further guidance on tracking and using state.</p>"},{"location":"developers/usage-best-practices/#limit-uses-of-dynamic-shapes","title":"Limit uses of dynamic shapes","text":"<p>While IREE aims to support general dynamic shapes use, it is better able to optimize parts of programs where shapes are static. Slow varying dimensions like batch index or timestamp are safer uses of dynamic shapes than faster varying dimensions like the x/y/channel dimensions of images.</p> <p>See the dynamic shapes sample for further guidance on using dynamic shapes.</p>"},{"location":"developers/usage-best-practices/#practices-for-compilation-settings","title":"Practices for compilation settings","text":"<p>TODO: which compiler targets to use (try both CUDA and Vulkan?)</p> <p>TODO: use the most specific LLVM target triple you can?</p>"},{"location":"developers/usage-best-practices/#tuning-compilation-heuristics","title":"Tuning compilation heuristics","text":"<p>IREE runs its own suite of benchmarks continuously using the definitions at https://github.com/iree-org/iree/tree/main/benchmarks. The flags set for these benchmarks represent the latest manually tuned values for workloads we track closely and referencing them may help with your own search for peak performance. You can use these flags in your own explorations, but note that as compiler performance matures, the existing flags will gradually be replaced with attributes for autotuning or command line options for experimental features.</p>"},{"location":"developers/usage-best-practices/#practices-for-runtime-use","title":"Practices for runtime use","text":"<p>TODO: sample code, profile numbers</p>"},{"location":"developers/usage-best-practices/#tuning-runtime-settings","title":"Tuning runtime settings","text":"<p>When running on the CPU, the task system flags specified in iree/task/api.c give control over how worker threads will be created. For example, the <code>--task_topology_group_count=3</code> flag can be set to explicitly run on three workers rather than rely on heuristic selection that defaults to one worker per detected physical core.</p> <p>If running on a single thread or system with no threading support the <code>local-sync</code> HAL driver can be used instead of the multithreaded <code>local-task</code> HAL driver to reduce dependencies and code size. When running with the <code>local-sync</code> driver all execution happens inline on the thread invoking the IREE runtime and will block until it has completed.</p>"},{"location":"developers/usage-best-practices/#do-the-minimum-amount-of-work-cache-queries-and-reuse-buffers","title":"Do the minimum amount of work: cache queries and reuse buffers","text":"<p>When using IREE's runtime libraries, try to front-load queries, particularly queries using strings that look up into maps like <code>iree_runtime_session_call_by_name</code>, so that hot sections of code are doing the minimum amount of work: routing inputs through buffers, scheduling runtime calls, and routing outputs through other buffers.</p>"},{"location":"developers/vulkan-environment-setup/","title":"Vulkan environment setup","text":"<p>Vulkan is a new generation graphics and compute API that provides high-efficiency, cross-platform access to modern GPUs used in a wide variety of devices from PCs and consoles to mobile phones and embedded platforms.</p> <p>This page lists steps and tips for setting up and troubleshooting a Vulkan development environment. The information here is meant to be generic.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-architecture","title":"Vulkan architecture","text":"<p>Vulkan adopts a layered architecture, which aims to better support extensiblity. There are four components involved in this architecture:</p> <ul> <li>The Vulkan Application</li> <li>The Vulkan Loader</li> <li>Vulkan Layers</li> <li>Installable Client Drivers (ICDs)</li> </ul> <p></p> <p>The Vulkan loader sits between the Vulkan application, which calls Vulkan APIs, and the ICDs, which implements these Vulkan APIs. Vulkan layers agument the Vulkan system to provide optional features like validation and debugging. The Vulkan loader composes a chain of requested layers, which processes the Vulkan application's API calls one by one, and finally redirects the API calls made by the Vulkan application to one or more ICDs.</p> <p>It's highly recommned to read the Architecture of the Vulkan Loader Interfaces Overview to get a general understanding of what these components are and how they interact with one another.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-setup_1","title":"Vulkan environment setup","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#windows","title":"Windows","text":"<p>You need to install the Vulkan SDK from LunarG to get the Vulkan loader.</p> <p>Typically the Vulkan SDK will be installed at <code>C:\\VulkanSDK\\&lt;version&gt;\\</code> and there will be an environment variable <code>VULKAN_SDK</code> pointing to it. You can run the <code>vulkancube</code> executable under the <code>Bin\\</code> subdirectory of the Vulkan SDK to make sure everything works properly. If not, you probably need to check whether the graphics card is Vulkan capable or update the driver.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#debianubuntu","title":"Debian/Ubuntu","text":"<p>For Ubuntu 20.04/22.04, it's recommended to directly install the full Vulkan SDK from LunarG's APT sources for the loader and various developer tools.</p> <p>If you want to have a minimal environment, the following packages should be installed for a proper Vulkan runtime:</p> <ul> <li><code>libvulkan1</code> for the Vulkan loader <code>libvulkan.so</code>.</li> <li>For AMD GPUs, you can install<ul> <li><code>mesa-vulkan-drivers</code> for the Mesa AMD Vulkan ICD, or</li> <li>AMD's official VUlkan ICD.</li> </ul> </li> <li>For NVIDIA GPUs, you can install<ul> <li><code>nvidia-vulkan-icd</code> on Debian for NVIDIA Vulkan ICD.</li> <li>the most recent <code>nvidia-driver-*</code> package on Ubuntu for NVIDIA Vulkan ICD.</li> </ul> </li> </ul> <p>The above packages provide the Vulkan loader and ICDs. With them a Vulkan application should be able to run. You may additionally want to install</p> <ul> <li>vulkan-tools for command-line tools like <code>vulkaninfo</code>   (dumping available ICDs and their capabilities) and GUI application like   <code>vulkancube</code> (rendering a rotating cube).</li> </ul> <p>In order to develop Vulkan applications, you additionally need the following packages:</p> <ul> <li>libvulkan-dev for various Vulkan header files.</li> <li>vulkan-validationlayers for Vulkan validation     layers like <code>VkLayer_standard_validation</code>.</li> </ul>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#linux","title":"Linux","text":"<p>For other Linux distros, please consult the corresponding package management tools for the packages needed. (And please feel free to update this doc regarding them.)</p> <p>You can also download and install the Vulkan SDK tarball from LunarG. It packages the loader with many useful layers and other shader tools.</p> <p>You can also build the Vulkan SDK component projects like Vulkan-Loader and Vulkan-ValidationLayers from source. But note that building these components separately you need to make sure they are consistent with one another (e.g., using the same version of Vulkan headers) to function together.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#android","title":"Android","text":"<p>Please make sure your Android device is Vulkan capable. Vulkan is supported on Android since 7, but we track newer Android versions (10+) closely and haven't set a clear min version yet.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#multiple-vulkan-sdks","title":"Multiple Vulkan SDKs","text":"<p>If you have multiple versions of Vulkan loaders exist, you may also need to set <code>LD_LIBRARY_PATH</code> and <code>LD_PRELOAD</code> to load the desired version of the loader. For example:</p> <pre><code>LD_LIBRARY_PATH={PATH_TO_VULKAN_SDK}/x86_64/lib/\nLD_PRELOAD=libvulkan.so.1\n</code></pre> <p>This can also be done by sourcing the proper <code>setup-env.sh</code> from one of the downloaded Vulkan SDKs.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-troubleshooting","title":"Vulkan environment troubleshooting","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#useful-environment-variables","title":"Useful environment variables","text":"<p>There are a few environment variables that can alter the default Vulkan loader behavior and print verbose information, notably:</p> <ul> <li><code>VK_LOADER_DEBUG</code>: enable loader debug messages. Setting it to <code>all</code> will     enable the most verbose logging from the loader. This is especially useful     when trying to see what layers/ICDs are searched and used.</li> <li><code>VK_ICD_FILENAMES</code>: force the loader to use a specific ICD. This is     especially useful when you have multiple Vulkan capable devices and want to     select which one to use manually.</li> <li><code>VK_INSTANCE_LAYERS</code>: force the loader to enable the given layers. For     example, You can force enable <code>VK_LAYER_LUNARG_api_dump</code> to have a detailed     dump of all Vulkan API calls made by the application. You can force enable     <code>VK_LAYER_LUNARG_core_validation</code> to validate the API calls made by the     application.</li> <li><code>VK_LAYER_PATH</code>: override the loader's standard layer library search folders.</li> </ul> <p>Please see the Vulkan loader's documentation for detailed explanation for these variables.</p>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#setting-environment-variables-for-bazel-test","title":"Setting environment variables for Bazel test","text":"<p>Bazel runs tests in a sandbox and environment variables must be passed through to the test runner. Consider putting environment setup in a <code>user.bazelrc</code> to save typing. For example:</p> <pre><code>test --test_env=\"LD_LIBRARY_PATH=/absolute/path/to/vulkan/sdk/x86_64/lib/\"\ntest --test_env=\"LD_PRELOAD=libvulkan.so.1\"\ntest --test_env=\"VK_LAYER_PATH=/absolute/path/to/additional/layers/:$VK_LAYER_PATH\"\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-function-vkcreateinstance-not-available-on-android","title":"Vulkan function <code>vkCreateInstance</code> not available on Android","text":"<p>Since Android 8 Oreo, Android re-architected the OS framework with project Treble. Framework libraries and vendor libraries have a more strict and clear separation. Their dependencies are carefully scrutinized and only selected cases are allowed. This is enforced with linker namespaces.</p> <p><code>/data/local/tmp</code> is the preferred directory for automating native binary tests built using NDK toolchain. They should be allowed to access libraries like <code>libvulkan.so</code> for their functionality. However, there was an issue with fully treblized Android 10 where <code>/data/local/tmp</code> did not have access to the linker namespaces needed by <code>libvulkan.so</code>. This should be fixed now. But as typically in the Android system, it takes a long time to see the fix getting propagated, if ever.</p> <p>A known workaround is to symlink the vendor Vulkan implementation under <code>/vendor/lib[64]</code> as <code>libvulkan.so</code> under <code>/data/local/tmp</code> and use <code>LD_LIBRARY_PATH=/data/local/tmp</code> when invoking IREE executables.</p> <p>For Qualcomm Adreno GPUs, the vendor Vulkan implementation is at <code>/vendor/lib[64]/hw/vulkan.*.so</code>. So for example for Snapdragon 865:</p> <pre><code>adb shell ln -s /vendor/lib64/hw/vulkan.kona.so /data/local/tmp/libvulkan.so\n</code></pre> <p>For ARM Mali GPUs, there is only one monolithic driver (<code>/vendor/lib[64]/libGLES_mali.so</code>) for OpenGL and Vulkan and the Vulkan vendor driver (<code>/vendor/lib[64]/hw/vulkan.*.so</code>) is just a symlink to it. So for example:</p> <pre><code>adb shell ln -s /vendor/lib64/libGLES_mali.so /data/local/tmp/libvulkan.so\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#ssh-on-linux-and-x-forwarding","title":"SSH on Linux and X forwarding","text":"<p>Physical devices enumerated on NVIDIA drivers can be affected by the <code>DISPLAY</code> environment variable. If you are running under an SSH session to Linux or using chrome remote desktop and have problems with physical device enumeration, you probably want to check the <code>DISPLAY</code> environment and set it to point to a display at the server side, for example:</p> <pre><code>export DISPLAY=:0\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/building/bazel/","title":"Building with Bazel","text":"<p>This page walks through building IREE from source using the Bazel build system.</p> <p>Warning</p> <p>Bazel build support is primarily for internal project infrastructure. We strongly recommend using CMake instead.</p> <p>Our Bazel configuration is also only tested on Linux. Windows and macOS may be unstable.</p>"},{"location":"developers/building/bazel/#prerequisites","title":"Prerequisites","text":"Linux macOS Windows <ol> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the     official docs.</p> </li> <li> <p>Install a compiler such as Clang (GCC is not fully supported).</p> <pre><code>sudo apt install clang\n</code></pre> <p>Set environment variables for Bazel:</p> <pre><code>export CC=clang\nexport CXX=clang++\n</code></pre> </li> <li> <p>Install Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> </ol> <ol> <li> <p>Install Homebrew:</p> <pre><code>/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)\"\n</code></pre> </li> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the official docs or     via Homebrew:</p> <pre><code>brew install bazel\n</code></pre> </li> <li> <p>Install Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> </ol> <p>Tip</p> <p>You can simplify installation by using a package manager like Scoop or Chocolatey.</p> <ol> <li> <p>Install Bazel, matching IREE's     <code>.bazelversion</code>     by following the official docs.</p> <p>Also install MSYS2 by following Bazel's documentation.</p> </li> <li> <p>Install Python3 (docs here)     and Python build requirements:</p> <pre><code>python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n</code></pre> </li> <li> <p>Install the full Visual Studio or \"Build Tools For Visual Studio\" from the     downloads page then     set the <code>BAZEL_VS</code> environment variable:</p> <pre><code>&gt; $env:BAZEL_VS = \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\"\n</code></pre> </li> </ol>"},{"location":"developers/building/bazel/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":""},{"location":"developers/building/bazel/#clone","title":"Clone","text":"<p>Use Git to clone the IREE repository and initialize its submodules:</p> <pre><code>git clone https://github.com/iree-org/iree.git\ncd iree\ngit submodule update --init\n</code></pre> <p>Configure Bazel:</p> <pre><code># This generates a `configured.bazelrc` file by analyzing your environment.\n# Skipping this step will make it difficult to select your platform/compiler.\npython3 configure_bazel.py\n</code></pre>  Linux macOS Windows <p>(No Linux-specific tips for configuring)</p> <p>(No macOS-specific tips for configuring)</p> <p>Tip</p> <p>Clone to a short path like <code>C:\\projects\\</code> to avoid issues with Windows maximum path lengths (260 characters).</p> <p>Tip</p> <p><code>configure_bazel.py</code> only detects that you have Windows and will output the default <code>--config=windows</code> to <code>configured.bazelrc</code>, which assumes the latest version of MSVC. To avoid some warnings, you may want to replace it with (for example) <code>--config=msvc2022</code>.</p>"},{"location":"developers/building/bazel/#build","title":"Build","text":"<p>Run all tests:</p> <pre><code>bazel test -k //...\n</code></pre> <p>Run all tests except those that require CUDA:</p> <pre><code>bazel test -k //... \\\n    --iree_drivers=local-sync,local-task,vulkan \\\n    --test_tag_filters=\"-driver=cuda,-target=cuda\" \\\n    --build_tag_filters=\"-driver=cuda,-target=cuda\"\n</code></pre> <p>Run all tests except those that require a GPU (any API):</p> <pre><code>bazel test -k //... \\\n    --iree_drivers=local-sync,local-task,vulkan \\\n    --test_tag_filters=\"-driver=vulkan,-driver=metal,-driver=cuda,-target=cuda\" \\\n    --build_tag_filters=\"-driver=cuda,-target=cuda\"\n</code></pre> <p>Tip</p> <p>See the <code>build_tools/bazel/build_test_all.sh</code> script for examples of other flags and environment variables that can be used to configure what Bazel runs.</p> <p>In general, build artifacts will be under the <code>bazel-bin</code> directory at the top level.</p>"},{"location":"developers/building/bazel/#recommended-userbazelrc","title":"Recommended <code>user.bazelrc</code>","text":"<p>You can put a user.bazelrc at the root of the repository and it will be ignored by git.</p>  Linux macOS Windows <pre><code>build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n</code></pre> <pre><code>build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n</code></pre> <pre><code>build --disk_cache=c:/bazelcache\nbuild:debug --compilation_mode=dbg --copt=/O2 --per_file_copt=iree@/Od --strip=never\n</code></pre>"},{"location":"developers/building/bazel/#whats-next","title":"What's next?","text":""},{"location":"developers/building/bazel/#take-a-look-around","title":"Take a Look Around","text":"<p>Build all of IREE's 'tools' directory:</p> <pre><code>bazel build tools/...\n</code></pre> <p>Check out what was built:</p> <pre><code>ls bazel-bin/tools/\n./bazel-bin/tools/iree-compile --help\n</code></pre> <p>Translate a MLIR file and execute a function in the compiled module:</p> <pre><code># iree-run-mlir &lt;compiler flags&gt; [input.mlir] &lt;runtime flags&gt;\n$ ./bazel-bin/tools/iree-run-mlir \\\n  --iree-hal-target-backends=vmvx --print-mlir \\\n  ./samples/models/simple_abs.mlir \\\n  --input=f32=-2\n</code></pre>"},{"location":"developers/building/cmake-options/","title":"CMake options","text":""},{"location":"developers/building/cmake-options/#frequently-used-cmake-options","title":"Frequently-used CMake options","text":""},{"location":"developers/building/cmake-options/#cmake_build_type","title":"<code>CMAKE_BUILD_TYPE</code>","text":"<ul> <li>type: STRING</li> </ul> <p>Sets the build type. Possible values are <code>Release</code>, <code>Debug</code>, <code>RelWithDebInfo</code> and <code>MinSizeRel</code>. If unset, build type is set to <code>Release</code>.</p>"},{"location":"developers/building/cmake-options/#cmake_lang_compiler","title":"<code>CMAKE_&lt;LANG&gt;_COMPILER</code>","text":"<ul> <li>type: STRING</li> </ul> <p>This is the command that will be used as the <code>&lt;LANG&gt;</code> compiler, which are <code>C</code> and <code>CXX</code> in IREE. These variables are set to compile IREE with <code>clang</code> or rather <code>clang++</code>. Once set, these variables can not be changed.</p>"},{"location":"developers/building/cmake-options/#iree-specific-cmake-options","title":"IREE-specific CMake options","text":"<p>This gives a brief explanation of IREE specific CMake options and variables.</p>"},{"location":"developers/building/cmake-options/#iree_enable_runtime_tracing","title":"<code>IREE_ENABLE_RUNTIME_TRACING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables instrumented runtime tracing. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_enable_compiler_tracing","title":"<code>IREE_ENABLE_COMPILER_TRACING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables instrumented compiler tracing. This requires that <code>IREE_ENABLE_RUNTIME_TRACING</code> also be set. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_compiler","title":"<code>IREE_BUILD_COMPILER</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE compiler. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_tests","title":"<code>IREE_BUILD_TESTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE unit tests. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_docs","title":"<code>IREE_BUILD_DOCS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE documentation files. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_samples","title":"<code>IREE_BUILD_SAMPLES</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds IREE sample projects. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_python_bindings","title":"<code>IREE_BUILD_PYTHON_BINDINGS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE python bindings. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite","title":"<code>IREE_BUILD_BINDINGS_TFLITE</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE TFLite C API compatibility shim. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite_java","title":"<code>IREE_BUILD_BINDINGS_TFLITE_JAVA</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds the IREE TFLite Java bindings with the C API compatibility shim. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_build_experimental_remoting","title":"<code>IREE_BUILD_EXPERIMENTAL_REMOTING</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Builds experimental remoting component. Defaults to <code>OFF</code>.</p>"},{"location":"developers/building/cmake-options/#iree_hal_driver_defaults","title":"<code>IREE_HAL_DRIVER_DEFAULTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Default setting for each <code>IREE_HAL_DRIVER_*</code> option.</p>"},{"location":"developers/building/cmake-options/#iree_hal_driver_","title":"<code>IREE_HAL_DRIVER_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling the build for each runtime HAL driver.</p>"},{"location":"developers/building/cmake-options/#iree_target_backend_defaults","title":"<code>IREE_TARGET_BACKEND_DEFAULTS</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Default setting for each <code>IREE_TARGET_BACKEND_*</code> option.</p>"},{"location":"developers/building/cmake-options/#iree_target_backend_","title":"<code>IREE_TARGET_BACKEND_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling the build for each compiler target backend.</p>"},{"location":"developers/building/cmake-options/#iree_input_","title":"<code>IREE_INPUT_*</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Individual options enabling each set of input dialects.</p>"},{"location":"developers/building/cmake-options/#iree_output_format_c","title":"<code>IREE_OUTPUT_FORMAT_C</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enables the vm-c compiler output format, using MLIR EmitC. Defaults to <code>ON</code>.</p>"},{"location":"developers/building/cmake-options/#iree_dev_mode","title":"<code>IREE_DEV_MODE</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Configure settings to optimize for IREE development (as opposed to CI or release). Defaults to <code>OFF</code>. For example, this will downgrade some compiler diagnostics from errors to warnings.</p>"},{"location":"developers/building/cmake-options/#iree_enable_lld","title":"<code>IREE_ENABLE_LLD</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Use lld when linking. Defaults to <code>OFF</code>. This option is equivalent to <code>-DIREE_USE_LINKER=lld</code>. The option <code>IREE_ENABLE_LLD</code> and <code>IREE_USE_LINKER</code> can not be set at the same time.</p>"},{"location":"developers/building/cmake-options/#iree_enable_asan","title":"<code>IREE_ENABLE_ASAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable address sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_msan","title":"<code>IREE_ENABLE_MSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable memory sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_tsan","title":"<code>IREE_ENABLE_TSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable thread sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#iree_enable_ubsan","title":"<code>IREE_ENABLE_UBSAN</code>","text":"<ul> <li>type: BOOL</li> </ul> <p>Enable undefiend behavior sanitizer if the current build type is Debug and the compiler supports it.</p>"},{"location":"developers/building/cmake-options/#cross-compilation","title":"Cross-compilation","text":"<p>When cross compiling (using a toolchain file like <code>android.toolchain.cmake</code>), first build and install IREE's tools for your host configuration, then use the <code>IREE_HOST_BIN_DIR</code> CMake option to point the cross compiled build at the host tools.</p>"},{"location":"developers/building/cmake-with-ccache/","title":"CMake with <code>ccache</code>","text":"<p><code>ccache</code> is a compilation cache. In principle, just prepending compiler invocations with <code>ccache</code> is all one needs to enable it, e.g.</p> <pre><code>ccache clang foo.c -c -o foo.o\n</code></pre> <p>takes care of executing <code>clang</code> with these arguments and caches the output file <code>foo.o</code>. The next invocation then skips executing <code>clang</code> altogether.</p> <p>When the cache is hit, the speedup is such that the \"compilation\" becomes essentially free. However, <code>ccache</code> only caches compilation, not linking.</p> <p>Here a few scenarios where <code>ccache</code> helps:</p> <ul> <li>Incremental rebuilds. While <code>cmake</code> always tries to avoid unnecessary work in   incremental rebuilds, it can only make simple decisions based on file   timestamps. <code>ccache</code> sees deeper: if the raw source code isn't readily   a cache hit, it will then try again after preprocessing and discarding   comments.</li> <li>One pain point with <code>cmake</code> is having to start over from a clean build   directory from time to time, which by default means paying again the full cost   of a cold build. Thankfully <code>ccache</code> keeps its cache outside of any <code>cmake</code>   build directory, so the first build in the new clean build directory may be   very fast.</li> </ul>"},{"location":"developers/building/cmake-with-ccache/#installing-and-setting-up-ccache","title":"Installing and setting up <code>ccache</code>","text":"<p><code>ccache</code> is available on most platforms. On Debian-based Linux distributions, do:</p> <pre><code>sudo apt install ccache\n</code></pre> <p>The one <code>ccache</code> setting that you probably need to configure is the maximum cache size. The default <code>5G</code> is too small for our purposes. To set the cache max size, do this once:</p> <pre><code>ccache --max-size=20G\n</code></pre> <p>Tip: At the moment (late 2020), most of the code we're building is <code>third_party/llvm-project</code> so the fundamental limiting factor to how far we can cache away rebuilds is how often that dependency gets updated. Given how frequently it currently is updated, I'm finding that <code>20G</code> is enough to make the <code>ccache</code> size not be the limiting factor.</p>"},{"location":"developers/building/cmake-with-ccache/#telling-cmake-to-use-ccache","title":"Telling CMake to use <code>ccache</code>","text":"<p>Use the CMake COMPILER_LAUNCHER functionality by setting <code>CMAKE_C_COMPILER_LAUNCHER=ccache</code> and <code>CMAKE_CXX_COMPILER_LAUNCHER=ccache</code> in your</p> <p>Notes:</p> <ul> <li>This approach only works with the <code>Ninja</code> and <code>Makefile</code> generators   (<code>cmake -G</code> flag). When using other generators, another approach is needed,   based on wrapping the compiler in a script that prepends <code>ccache</code>. See this   article.</li> </ul>"},{"location":"developers/building/cmake-with-ccache/#ensuring-that-ccache-is-used-and-monitoring-cache-hits","title":"Ensuring that <code>ccache</code> is used and monitoring cache hits","text":"<p>The <code>ccache -s</code> command dumps statistics, including a cache hit count and ratio. It's convenient to run periodically with <code>watch</code> in a separate terminal:</p> <pre><code>watch -n 0.1 ccache -s  # update the stats readout every 0.1 seconds\n</code></pre>"},{"location":"developers/building/emscripten/","title":"Building with Emscripten","text":"<p>Emscripten is a complete compiler toolchain to WebAssembly, using LLVM, with a special focus on speed, size, and the Web platform. Emscripten can be used to compile parts of IREE to WebAssembly for execution within web browsers or other Wasm runtimes.</p>","tags":["Web"]},{"location":"developers/building/emscripten/#status","title":"Status","text":"<p>IREE's runtime can be compiled through Emscripten in some limited configurations. More of the runtime will be supported over time.</p> <p>IREE's compiler can be compiled through Emscripten with local changes. More work is needed for this to be generally supported.</p>","tags":["Web"]},{"location":"developers/building/emscripten/#prerequisites","title":"Prerequisites","text":"<p>Read https://emscripten.org/docs/getting_started/downloads.html and run</p> <pre><code>./emsdk install latest\n./emsdk activate latest\nsource ./emsdk_env.sh\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#building-irees-runtime-with-emscripten","title":"Building IREE's runtime with Emscripten","text":"","tags":["Web"]},{"location":"developers/building/emscripten/#host-configuration","title":"Host configuration","text":"<p>Build and install at least the compiler tools on your host machine, or install them from a binary distribution:</p> <pre><code>$ cmake -G Ninja -B ../iree-build-host/ \\\n    -DCMAKE_C_COMPILER=clang \\\n    -DCMAKE_CXX_COMPILER=clang++ \\\n    -DCMAKE_INSTALL_PREFIX=../iree-build-host/install \\\n    .\n$ cmake --build ../iree-build-host/ --target install\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#target-configuration","title":"Target configuration","text":"<pre><code>$ emcmake cmake -G Ninja -B ../iree-build-emscripten/ \\\n  -DCMake_BUILD_TYPE=Release \\\n  -DIREE_HOST_BIN_DIR=$(realpath ../iree-build-host/install/bin) \\\n  -DIREE_BUILD_TESTS=OFF \\\n  -DIREE_BUILD_COMPILER=OFF \\\n  .\n</code></pre> <p>Build:</p> <pre><code>cmake --build ../iree-build-emscripten/ \\\n  --target iree_samples_simple_embedding_simple_embedding_vmvx_sync\n</code></pre>","tags":["Web"]},{"location":"developers/building/emscripten/#load-into-a-webassembly-environment","title":"Load into a WebAssembly environment","text":"<p>Copy the outputs from the build process (e.g. <code>simple_embedding_vmvx_sync.js</code> and <code>simple_embedding_vmvx_sync.wasm</code>) into your application and follow instructions at either https://webassembly.org/getting-started/developers-guide/ or https://developer.mozilla.org/en-US/docs/WebAssembly/Loading_and_running.</p>","tags":["Web"]},{"location":"developers/debugging/android-with-lldb/","title":"Android LLDB debugging","text":"<p>This doc shows how to use LLDB to debug native binaries on Android. For a more complete explanation, see the official LLDB documentation on remote debugging.</p>","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#prerequisites","title":"Prerequisites","text":"<p>We assume the following setup:</p> <ol> <li>Android NDK is installed and    the <code>ANDROID_NDK</code> environment variable is set to the installation path.</li> <li>Your Android device connected and configured for    <code>adb</code>.</li> <li>The Android binary of interest is already compiled and the command to run it    (in <code>adb shell</code>) is <code>&lt;your-binary&gt; [program args...]</code>. This does not have    to be a proper Android app with a manifest, etc.</li> </ol>","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#running-manually","title":"Running Manually","text":"<ol> <li> <p>Push the toolchain files, including <code>lldb-server</code>, to your device:</p> <pre><code>adb shell \"mkdir -p /data/local/tmp/tools\"\nadb push \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/aarch64/* /data/local/tmp/tools\n</code></pre> <p>You may need to adjust the clang toolchain version to match the one in your NDK. You can find it with <code>find \"$ANDROID_NDK/toolchains/llvm/prebuilt\" -name lldb-server</code>.</p> </li> <li> <p>Set up port forwarding. We are going to use port 5039 but you are free to    pick a different one:</p> <pre><code>adb forward tcp:5039 tcp:5039\n</code></pre> </li> <li> <p>Start an <code>lldb-server</code> in a new interactive adb shell:</p> <pre><code>adb shell\n/data/local/tmp/tools/lldb-server platform --listen '*:5039' --server\n</code></pre> </li> <li> <p>Launch <code>lldb</code>, connect to the server and run the binary:</p> <pre><code>lldb -o 'platform select remote-android' \\\n    -o 'platform connect connect://:5039' \\\n    -o 'platform shell cd /data/local/tmp'\ntarget create &lt;your-binary&gt;\nrun [program args...]\n</code></pre> <p>You can either use the system <code>lldb</code> or a prebuilt under <code>\"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/&lt;your-host-arch&gt;</code>.</p> <p>Explanation: each <code>-o</code> (short for <code>--one-shot</code>) tells lldb to execute a command on startup. You can run those manually in the lldb shell, if you prefer. Then, we tell lldb which working directory to use, where to find the executable, and what command line arguments to use.</p> </li> </ol>","tags":["Android"]},{"location":"developers/debugging/compile-time-regressions/","title":"Compile time regression debugging","text":"<p>So the IREE compiler used to compile a program quickly, but it is now slower. What do you do?</p>"},{"location":"developers/debugging/compile-time-regressions/#initial-information-gathering","title":"Initial information gathering","text":"<p>Try to answer as many of these questions as you can:</p> <p>When did compilation get slower?</p> <p>A specific git commit is ideal, but \"sometime in the last week\" is a good   starting point. You'll ultimately want to find a culprit release or git   commit that changed the compiler code.</p> <p>How much slower did compilation get?</p> <p>Be specific - did it jump from 1 minute to 2 minutes, or 1 minute to 1 hour?   Identifying the scale of the regression can help set the priority to   investigate it.</p> <p>What is the full compile command?</p> <p>Try to extract the input program and full list of flags passed to the   compiler binary so that others can reproduce what you're seeing. Try to   distill this as much as possible to using just native tools (no Python or   other framework layers).</p> <p>What environment is the compiler running in?</p> <p>Are you using a <code>Debug</code> build, or a release build? What operating system and   size machine is running the compiler (e.g. Linux developer machine, or a   smaller system)?</p>"},{"location":"developers/debugging/compile-time-regressions/#culprit-finding-and-bisecting","title":"Culprit finding and bisecting","text":"<p>If you only have a rough idea of when something changed and want to narrow that down to a specific code change, bisecting can help.</p>"},{"location":"developers/debugging/compile-time-regressions/#running-git-bisect","title":"Running <code>git bisect</code>","text":"<p>Building the compiler from source and using <code>git bisect</code> will let you pinpoint specific commits in IREE, though it typically won't let you step through changes in submodules (e.g. MLIR updates in <code>third_party/llvm-project/</code>).</p> <p>Tip: Configure ccache if you'll be rebuilding the compiler while bisecting</p> <p>A manual workflow with <code>git bisect</code> looks like this:</p> <pre><code>git bisect start --first-parent\ngit bisect good [&lt;rev&gt;]\ngit bisect bad [&lt;rev&gt;]\n\n# Read the prompts from the command as it runs\n# At each step, test the compiler:\n#   git submodule update\n#   cmake --build build/ --target iree-compile\n#   ./build/tools/iree-compile &lt;args&gt;\n#       attach Tracy, observe timing, print IR, etc. to determine if fast or slow\n#       if fast, `git bisect good`\n#       if slow, `git bisect bad`\n#   repeat\n</code></pre> <p>An automated workflow can use <code>git bisect run</code> and a script:</p> <pre><code># run_bisect.sh\ngit submodule update\ncmake --build build/ --target iree-compile\n# Other logic here\n</code></pre> <pre><code>git bisect start --first-parent\ngit bisect good [&lt;rev&gt;]\ngit bisect bad [&lt;rev&gt;]\ngit bisect run run_bisect.sh\n</code></pre>"},{"location":"developers/debugging/compile-time-regressions/#sample-compile-executable-sources-individually-with-a-timeout","title":"Sample: compile executable sources individually with a timeout","text":"<pre><code>#!/bin/bash\n\nset -xeuo pipefail\n\n# --------------------------------------------------------------------------- #\n# Settings                                                                    #\n# --------------------------------------------------------------------------- #\n\nINPUT_FILE_PATH=\"/path/to/program.mlirbc\"\nTMP_DIR=\"../iree-tmp\"\n\ndeclare -a COMPILER_FLAGS=(\n  \"--iree-input-type=stablehlo\"\n  \"--iree-hal-target-backends=cuda\"\n  \"--iree-hal-cuda-llvm-target-arch=sm_80\"\n)\n\nTIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10\n\n# --------------------------------------------------------------------------- #\n# Utility functions                                                           #\n# --------------------------------------------------------------------------- #\n\n# Call to have `git bisect` skip this commit (don't mark as good _or_ bad)\n# https://git-scm.com/docs/git-bisect#_bisect_run\nskip_on_error() {\n  &gt;&amp;2 echo \"** Skipping due to error: $1 **\"\n  exit 125  # Special exit code for `git bisect skip`\n}\n\n# --------------------------------------------------------------------------- #\n# Main script                                                                 #\n# --------------------------------------------------------------------------- #\n\n# Store git version hash, so we can dump artifacts to unique directories later.\nGIT_SHA=\"$(git rev-parse --short HEAD)\"\n\necho \"** Building iree-compile at ${GIT_SHA} **\"\n\n# The `git bisect` command only checks out a commit, so update submodules.\ngit submodule update\n\n# Build the compiler. You'll want ccache configured to make this fast!\ncmake --build ../iree-build/ --target iree-compile || skip_on_error \"CMake build failed\"\n\n# Run the compiler, dumping executable sources and stopping.\nSOURCES_DIR=\"${TMP_DIR}/sources-${GIT_SHA}\"\necho \"** Running iree-compile at ${GIT_SHA}, dumping sources to ${SOURCES_DIR} **\"\n../iree-build/tools/iree-compile \\\n    ${INPUT_FILE_PATH} \\\n    ${COMPILER_FLAGS[@]} \\\n    --iree-hal-dump-executable-sources-to=${SOURCES_DIR} \\\n    --compile-to=executable-sources \\\n    -o /dev/null\n\n# Run the compiler again on each executable individually.\necho \"** Running iree-compile at ${GIT_SHA} for each executable source **\"\nSOURCES=($(ls -1 ${SOURCES_DIR}))\nfor SOURCE in \"${SOURCES[@]}\"; do\n  echo \"  * Compiling: ${SOURCE} *\"\n  timeout --verbose ${TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE} \\\n   ../iree-build/tools/iree-compile ${SOURCES_DIR}/${SOURCE} \\\n    ${COMPILER_FLAGS[@]} \\\n    --compile-mode=hal-executable \\\n    -o /dev/null\ndone\n</code></pre>"},{"location":"developers/debugging/compile-time-regressions/#profiling-and-tracing","title":"Profiling and tracing","text":"<p>If you want to understand why the compiler is fast or slow, or if you want to compare performance in detail between two versions, consider these profiling options.</p>"},{"location":"developers/debugging/compile-time-regressions/#mlir-pass-timing","title":"MLIR pass timing","text":"<p>The <code>-mlir-timing</code> flag enables Pass Timing instrumentation. Once the compiler finishes running, this prints a report like</p> <pre><code>===-------------------------------------------------------------------------===\n                      ... Pass execution timing report ...\n===-------------------------------------------------------------------------===\n  Total Execution Time: 0.0203 seconds\n\n   ---Wall Time---  --- Name ---\n   0.0047 ( 55.9%)  Canonicalizer\n   0.0019 ( 22.2%)  VerifierPass\n   0.0016 ( 18.5%)  LLVMLoweringPass\n   0.0003 (  3.4%)  CSE\n   0.0002 (  1.9%)  (A) DominanceInfo\n   0.0084 (100.0%)  Total\n</code></pre> <p>This is easy data to collect, especially remotely over SSH, but it might not paint a complete picture and requires waiting for compilation to finish.</p>"},{"location":"developers/debugging/compile-time-regressions/#using-tracy","title":"Using Tracy","text":"<p>See our documentation on profiling with Tracy, in particular the section on tracing <code>iree-compile</code>. For compile time regressions, pay particular attention to the compilation phases (Flow/Stream/HAL), how many times <code>TranslateExecutablesPass</code> runs, and if there are outlier passes that take significantly longer to run than others.</p> <p>Here are some previous analyses for inspiration:</p> <ul> <li>https://github.com/iree-org/iree/issues/12033</li> <li>https://github.com/iree-org/iree/issues/12035</li> <li>https://github.com/iree-org/iree/issues/12183</li> <li>https://github.com/iree-org/iree/issues/13189</li> </ul> <p>Example slow trace:</p> <p></p> <p>Example fast trace:</p> <p></p> <p>Example sampling statistics showing 10s of minutes in LLVM codegen:</p> <p></p>"},{"location":"developers/debugging/compile-time-regressions/#using-perf-and-pprof","title":"Using <code>perf</code> and <code>pprof</code>","text":"<p>These linux tools allow for fine-grained profiling. Below we present a list of steps to profile <code>iree-compile</code> and visualize the results as a flame graph.</p> <ol> <li> <p>Compile IREE tools with debug information (line tables at minimum) and frame    pointers. You can do that by selecting the <code>RelWithDebInfo</code> build type and    adding <code>-fno-omit-frame-pointers</code> to your compiler flags:</p> <pre><code>cmake &lt;Your-CMAKE-Flags&gt; \\\n   -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n   -DCMAKE_CXX_FLAGS=\"-fno-omit-frame-pointer\" \\\n   -DCMAKE_C_FLAGS=\"-fno-omit-frame-pointer\"\n</code></pre> </li> <li> <p>Set perf event scope/access to the appropriate level with    <code>perf_event_paranoid</code>.</p> <pre><code>echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid\n</code></pre> </li> <li> <p>Run <code>iree-compile</code> under the <code>perf</code> profiler and collect profile data. This    requires <code>sudo</code>.</p> <pre><code>sudo perf record -F 999 -g -- tools/iree-compile &lt;Your-Compile-Arguments&gt;\nsudo chown \"$USER:$USER\" perf.data\n</code></pre> </li> <li> <p>Use <code>pprof</code> to process <code>perf.data</code> from the previous step and start a local    http server with the visualized profile. See the    <code>pprof</code>'s README for installation    instructions and make sure to build    <code>perf_data_converter</code> and    add it to your <code>PATH</code>.</p> <pre><code>pprof -http ':' perf.data\n</code></pre> </li> </ol>"},{"location":"developers/debugging/compile-time-regressions/#stepping-through-compiler-ir","title":"Stepping through compiler IR","text":"<p>Debugging an MLIR-based compiler like IREE usually involves reading IR at some point. For compile time regressions, it helps to snapshot the IR at a few key phases and look for differences between fast compilation and slow compilation.</p> <p>Here is one useful flag combination:</p> <pre><code>--mlir-disable-threading \\\n--mlir-elide-elementsattrs-if-larger=8 \\\n--mlir-print-ir-after=iree-hal-materialize-interfaces\n</code></pre>"},{"location":"developers/debugging/gpu/","title":"GPU debugging playbook","text":"<p>This page aims to provide general approaches and practical tips for debugging GPU compiler/runtime correctness/performance issues in IREE.</p> <p>GPUs fundamentally have similar architectures and software stacks. We target GPUs from various vendors using different GPU APIs, but they share quite a lot common infrastructure in IREE. So the approaches and tips here should be widely applicable.</p> <p>For the ones that are specific to a particular kind of problem/component/GPU, they are prefixed with proper icons to be clear. Here are what those icons represents--</p> Icon Category Correctness Performance AMD HIP/ROCm Apple Metal Microsoft DirectX NVIDIA CUDA Vulkan","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#general-methodology","title":"General methodology","text":"<p>The difficulties associated with debugging typically arise from isolating the problematic component and pinpointing the culprit. Once done, the solution typically derives naturally.</p> <p>There are many components in the IREE stack; hierarchically we can categorize them into either the compiler or runtime bucket:</p> <ul> <li>For compilers, there are multiple layers from the top to the bottom--frontend   input importers, IREE flow/stream compilation, IREE host/device compilation,   GPU backend in LLVM proper or GPU driver compiler for SPIR-V.</li> <li>For runtime, we have fewer layers--IREE HAL drivers, and GPU driver.</li> </ul> <p>Any of the above components/layers can have bugs. It's important to reduce the potential surface area to make the problem more tractable.</p> <p>Once we have a more isolated case, the general methodology to pinpoint the exact culprit is to</p> <ol> <li>collect and inspect the symptoms,</li> <li>form hypothesis and run experiments to prove/refute the hypothesis, and</li> <li>iterate.</li> </ol>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#with-shortcuts","title":".. with shortcuts","text":"<p>The above procedure is for facing a large problem with no clue, for example, when bringing up a new model end-to-end via IREE.</p> <p>Though most of the time, we can leverage existing facilities to avoid going down the full top-down hiearchical debugging procedure. For example, for regression happening on an existing model, CI or <code>git bitsect</code> might tell us directly the culprit commit.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#using-tools","title":".. using tools","text":"<p>For issues with strong signals like crashing, it's also easier to pinpoint the exact culprit with dedicated tools--we can leverage various sanitizers or debuggers.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#isolating-the-problematic-component","title":"Isolating the problematic component","text":"<p>If we are facing a large problem without a clear clue, we need to isolate the problematic compiler or runtime layer first, typically by comparing with a working solution:</p> <p>[correctness/performance]</p> <p>Sanitize the environment first. Asking these questions and making sure the environment is proper can save you hours of debugging sometimes:</p> <ul> <li>Did you recently updated the GPU SDK or driver?</li> <li>Are others able to reproduce the issue?</li> <li>If not what SDK / driver versions they are using?</li> <li>Is your machine drawing enough power when benchmarking?</li> <li>Is your machine connected with a mointor (e.g., for Vulkan)?</li> <li>How long since you last rebooted your machine? \ud83d\udc7b</li> </ul> <p>[correctness/performance]</p> <p>We have multiple GPU targets/drivers in IREE--LLVMGPU/CUDA, LLVMGPU/HIP, SPIR-V/Vulkan, SPIR-V/Metal.</p> <p>For the same GPU, we typically have two paths to target, e.g., CUDA/HIP or Vulkan for NVIDIA/AMD GPUs, Metal or Vulkan for Apple GPUs.</p> <p>If one path is correct/performant, we can diff against it to try isolate the problem--the common/shared compiler/runtime code is likely okay; what differs between paths is likely problematic.</p> <p>[correctness/performance] [vulkan]</p> <p>Vulkan supports different GPUs. Similarly, if one GPU gives correct/performant result, we diff against it to find clues.</p> <p>Even more code in compiler/runtime are shared here; what's problematic is likely different capabilities triggering different CodeGen pipelines so revealing bugs in a particular CodeGen pipeline. Or there are driver issues from a particular vendor.</p> <p>[correctness]</p> <p>If the CPU is working properly, we can use the same dispatch region formation and diff against the CPU dispatches one by one to isolate the problem. See this issue as an example.</p> <p>[correctness]</p> <p><code>--iree-flow-trace-dispatch-tensors</code> and/or <code>--iree-flow-break-dispatch=</code> to <code>iree-compile</code> is quite helpful to inspect the output after all/each dispatch(es).</p> <p>[correctness]</p> <p><code>iree-reduce</code> is a great tool to reduce and isolate issues programmatically. See more details here.</p>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#pinpointing-compiler-issues","title":"Pinpointing compiler issues","text":"<p>Once we identified that the problem is due to some compiler issue, we can investigate by comparing with different paths and inputs:</p> <p>[correctness]</p> <p>For the same dispatch, we may have different CodeGen pipelines, e.g., for matmul we can have simple SIMT pipeline or using tensor/matrix cores. We can try to switch between different pipelines to isolate the problem.</p> <p>[correctness]</p> <p>Assuming we have a small repro, we can also try to see if there are \"patterns\" in the wrong result (e.g., this issue). Or mutate the input to see if the failure has some \"consistency\".</p> <p>[correctness/performance]</p> <p><code>--mlir-print-ir-*</code> and <code>--debug*</code> to <code>iree-opt</code> is our best friend. Sometimes it just takes eyeballing the IRs between stages to find clues.</p> <p>[performance]</p> <p>For identifying performance issues, we typically need to use:</p> <ul> <li>Tracy profiling to get a   course-grained command-buffer timing to understand what's the most   time-consuming kernels.   Typical big performance issues include but not limit to going down a   incorrect CodeGen pipeline, missing tiling/vectorization, having an   improper tiling/vectorization configuration, and so on.   If the course-grained information is not enough, then we need to</li> <li>vendor-specific tools to   understand kernel internal counters to identify the bottleneck.</li> </ul>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/gpu/#pinpointing-runtime-issues","title":"Pinpointing runtime issues","text":"<p>On the other side, if we suspect that it's a runtime issue, here are some useful approachs and tips:</p> <p>[correctness/performance]</p> <p>Tracy profiling is a great way to view how the application runs dynamically. It can help to show problematic GPU API call sequences and performance bottlenecks.</p> <ul> <li>It requires adding <code>-DIREE_ENABLE_RUNTIME_TRACING=ON</code> during CMake   configuration, or use the <code>IREE_PY_RUNTIME=tracy</code> environment variable   when invoking IREE runtime installed via Python packages.</li> </ul> <p>[correctness]</p> <p>GPU validation can sometimes give us hints:</p> <ul> <li>[] Enable validation via <code>export METAL_DEVICE_WRAPPER_TYPE=1</code>.</li> <li>[] Use <code>--vulkan_validation_layers=true</code> to <code>iree-run-module</code>, or</li> <li>[] Force enable via environment variables to the Vulkan loader:   <code>export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation</code>   (may additionally need   <code>export VK_LAYER_PATH=$VULKAN_SDK/etc/vulkan/explicit_layer.d</code> and   <code>export LD_LIBRARY_PATH=$VULKAN_SDK/lib</code> if Vulkan SDK is not installed   to a system path).</li> </ul> <p>[correctness]</p> <p>Turning on verbose output can give us more information:</p> <ul> <li>When compiling IREE runtime, add   <code>-DCMAKE_C_FLAGS=-DIREE_VM_EXECUTION_TRACING_FORCE_ENABLE=1</code> in CMake   configuration to enable VM op tracing.</li> <li>[] Use <code>--vulkan_debug_verbosity=4</code> to <code>iree-run-module</code>.</li> <li>[] Print all Vulkan APIs calls with detailed arguments:   <code>export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_api_dump</code>   (may additionally need   <code>export VK_LAYER_PATH=$VULKAN_SDK/etc/vulkan/explicit_layer.d</code> and   <code>export LD_LIBRARY_PATH=$VULKAN_SDK/lib</code> if Vulkan SDK is not installed   to a system path).</li> </ul> <p>[correctness]</p> <p>Try different \"debugging modes\" provided by HAL drivers:</p> <ul> <li>[] Switch <code>--cuda_use_streams=</code> between <code>true</code> and <code>false</code> to   <code>iree-run-module</code> to see whether the issue comes from the stream/graph   command buffer implementation.</li> <li>[] Switch <code>--cuda_async_allocations=false</code> to <code>iree-run-module</code> to   see if the issue comes from async allocation.</li> <li>[] Use <code>--metal_serial_command_dispatch=true</code>,   <code>--metal_command_buffer_retain_resources=true</code>, or   <code>--metal_resource_hazard_tracking=true</code> to <code>iree-run-module</code> to see   if any of the above \"fixes\" the issue.   It can help to isolate the pontential problem.</li> <li>[] Use <code>--vulkan_robust_buffer_access=true</code> to <code>iree-run-module</code>   especially when seeing undeterministic/corrupted contents in buffers and   suspecting there are buffer allocation/indexing issues.</li> </ul>","tags":["GPU","CUDA","Metal","ROCm","Vulkan"]},{"location":"developers/debugging/integration-tests/","title":"Integration test debugging","text":"<p>This document includes tips for triaging integration test correctness issues. Feel free to reach out to @hanhanW or ask questions on Discord for more help.</p>"},{"location":"developers/debugging/integration-tests/#general-tips","title":"General tips","text":""},{"location":"developers/debugging/integration-tests/#narrow-down-reproducers","title":"Narrow down reproducers","text":"<ul> <li>Models themselves can be large, and IREE breaks models into dispatches/kernels and then launches those individually. Program outputs could diverge starting from any individual launch. To get a smaller reproducer, you can use --iree-flow-trace-dispatch-tensors.</li> <li>You can compare the logs between builds/backends to get an idea about which dispatch results in wrong outputs. The dumped inputs can be reused in a flagfile.</li> </ul> <p>Once a suspicious dispatch is identified, we can create a test case based on the dispatch function. The dispatch function can be derived after the <code>OutlineDispatchRegions</code> pass. The function signatures have to be modified manually. You'll have to put <code>flow.dispatch.tensor.load</code> variables to function arguments, and replace <code>flow.dispatch.tensor.store</code> with <code>return</code> op.</p> <p>Note: This only works when dispatch formation logics are identical between runs.</p>"},{"location":"developers/debugging/integration-tests/#iree-experimental-repository-tests","title":"iree-experimental repository tests","text":"<p>Follow README to run the model. The MLIR files will be generated. You'll find the saved file from log. E.g.,</p> <pre><code>[ RUN      ] MobilenetV2Int8Test.test_compile_tflite\nI0401 17:27:04.084272 140182373025024 test_util.py:119] Setting up for IREE\nI0401 17:27:04.085064 140182373025024 binaries.py:218] Invoke IREE Pipeline:\n  /tmp/iree-experimental/iree-experimental.venv/lib/python3.9/site-packages/iree/tools/tflite/iree-import-tflite\n    /tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/model.tflite\n    --mlir-print-debuginfo\n    --save-temp-tfl-input=/tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/tflite.mlir\n    --save-temp-iree-input=/tmp/iree-experimental/tflitehub/tmp/mobilenet_v2_int8_test.py/tosa.mlir\n</code></pre> <p>Unfortunately, the artifacts are not dumped in the runs. There is an issue for tracking this. A workaround can be found in the issue.</p>"},{"location":"developers/debugging/integration-tests/#tensorflow-integration-tests","title":"TensorFlow integration tests","text":"<p>These are steps to reproduce/address failures in TF/TFLite integration tests. These instructions are most stable on Linux, though they may work with a few tweaks on Windows and macOS.</p> <p>All steps here assume starting from the IREE root directory.</p> <ol> <li> <p>First create a Python virtual environment to install packages into:</p> <pre><code>python -m venv iree-tf.venv\nsource iree-tf.venv/bin/activate\n\n# Install test requirements\npython -m pip install -r ./integrations/tensorflow/test/requirements.txt\n</code></pre> </li> <li> <p>Install IREE's tools and Python bindings or build them from source</p> <p>Install distributed packages</p> <pre><code># Install packages from nightly releases\n# This should work for most cases, as the importers change infrequently\npython -m pip install \\\n  iree-compiler iree-runtime iree-tools-tf iree-tools-tflite \\\n  --find-links https://iree.dev/pip-release-links.html\n</code></pre> <p>OR build from source</p> <pre><code># Build Python bindings from source\ncmake -G Ninja -B ../iree-build/ -DIREE_BUILD_PYTHON_BINDINGS=ON .\ncmake --build ../iree-build/\n\n# Add IREE built-from-source Python packages to PYTHONPATH\nsource .env\n\n# Install IREE TF/TFLite Python packages\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n</code></pre> </li> <li> <p>Run the python test command line</p> <p>The command can be obtained from the run file. For example, if <code>iree_tfl_tests/llvmcpu_posenet_i8.run</code> failed,</p> <pre><code>cd integrations/tensorflow/test/\ncat iree_tfl_tests/llvmcpu_posenet_i8.run\n\n# REQUIRES: llvmcpu\n# RUN: %PYTHON -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=%t\n\ncd python/\npython -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=/tmp/posenet_i8_failure\n</code></pre> <p>Note that the command can only be run under <code>integrations/tensorflow/test/python</code> directory.</p> </li> <li> <p>Extract intermediate files and use with native tools</p> <p>The test will create an <code>iree_input.mlir</code> in the temp directory specified. Those can then be fed into <code>iree-compile</code> (built locally to reproduce the error)</p> <pre><code>iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-input-type=stablehlo \\\n  iree_input.mlir\n</code></pre> </li> </ol>"},{"location":"developers/debugging/releases/","title":"Release debugging playbook","text":""},{"location":"developers/debugging/releases/#tools-and-locations","title":"Tools and Locations","text":"<ul> <li><code>.github/workflows/build_package.yml</code>: Release packaging jobs</li> <li><code>build_tools/github_actions/build_dist.py</code>: Main script to build various   release packages (for all platforms). We usually use this when reproing to   approximate exactly what the CI does. Assumes a subdirectory of <code>c</code>   and writes builds to <code>iree-build</code> and <code>iree-install</code> as a peer of it. To use   locally, just symlink your source dir as <code>c</code> in an empty   directory (versus checking out).</li> </ul>"},{"location":"developers/debugging/releases/#mapping-releases-back-to-git-commits","title":"Mapping releases back to git commits","text":"<p>The source IREE commit SHA is embeded into pip releases in a few places. Starting in a python venv, you can find the IREE commit from both the shell:</p> <pre><code>\"$(find . -name 'iree-compile' -executable)\" --version\nIREE (https://iree.dev):\n  IREE compiler version 20231016.553 @ f1cb2692a086738d7f16274b9b3af6d2c15ef133\n  LLVM version 18.0.0git\n  Optimized build\n</code></pre> <p>and the Python API:</p> <pre><code>python -c \"import iree.compiler.version as v; print(v.REVISIONS['IREE'])\"\nf1cb2692a086738d7f16274b9b3af6d2c15ef133\n</code></pre>"},{"location":"developers/debugging/releases/#manylinux-releases","title":"Manylinux releases","text":"<p>The Linux releases are done in a manylinux2014 docker container. At the time of this writing, it has gcc 9.3.1 and Python versions 3.5 - 3.9 under <code>/opt/python</code>. Note that this docker image approximates a 2014 era RHEL distro, patched with backported (newer) dev packages. It builds with gcc and BFD linker unless if you arrange otherwise. <code>yum</code> can be used to get some packages.</p> <p>Get a docker shell (see exact docker image in build_package.yml workflow):</p> <pre><code>docker run --rm -it -v $(pwd):/work/c stellaraccident/manylinux2014_x86_64-bazel-4.2.2:latest /bin/bash\n</code></pre> <p>Remember that docker runs as root unless if you take steps otherwise. Don't touch write files in the <code>/work/c</code> directory to avoid scattering root owned files on your workstation.</p> <p>The default system Python is 2.x, so you must select one of the more modern ones:</p> <pre><code>export PATH=/opt/python/cp39-cp39/bin:$PATH\n</code></pre> <p>Build core installation:</p> <pre><code># (from within docker)\ncd /work\npython ./c/build_tools/github_actions/build_dist.py main-dist\n\n# Also supports:\n#   main-dist\n#   py-runtime-pkg\n#   py-xla-compiler-tools-pkg\n#   py-tflite-compiler-tools-pkg\n#   py-tf-compiler-tools-pkg\n</code></pre> <p>You can <code>git bisect</code> on the host and keep running the above in the docker container. Note that every time you run <code>build_dist.py</code>, it deletes the cmake cache but otherwise leaves the build directory (so it pays the configure cost but is otherwise incremental). You can just <code>cd iree-build</code> and run <code>ninja</code> for faster iteration (after the first build or if changing cmake flags). Example:</p> <p>Extended debugging in the manylinux container:</p> <pre><code>cd /work/iree-build\n# If doing extended debugging in the container, these may make you happier.\nyum install ccache devtoolset-9-libasan-devel gdb\n\n# Get an LLVM symbolizer.\nyum install llvm9.0\nln -s /usr/bin/llvm-symbolizer-9.0 /usr/bin/llvm-symbolizer\n\n# You can manipulate cmake flags. These may get you a better debug experience.\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASAN=ON -DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=gold -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .\n\nninja\n\n# Or you may need this if buggy LLVM tools (like mlir-tblgen) are leaking :(\nASAN_OPTIONS=\"detect_leaks=0\" ninja\n</code></pre> <p>Other tips:</p> <ul> <li>If debugging the runtime, you may have a better time just building the   Release mode <code>main-dist</code> package above once, which will drop binaries in the   <code>iree-install</code> directory. Then build the <code>py-runtime-pkg</code> or equiv and   iterate further in the build directory. Ditto for TF/XLA/etc.</li> </ul>"},{"location":"developers/debugging/releases/#testing-releases-on-your-fork","title":"Testing releases on your fork","text":"<p>To avoid interrupting the regular releases published on the IREE github, you can test any changes to the release process on your own fork.  Some setup is required before these github actions will work on your fork and development branch.</p> <p>You can run <code>schedule_candidate_release.yml</code> with a workflow dispatch from the actions tab. If you want to test using a commit other than the latest green on your <code>main</code> branch, modify the section that identifies the latest green commit to search from another commit or just hardcode one.</p> <p>To speed up <code>build_package.yml</code>, you may want to comment out some of the builds here. The <code>py-pure-pkgs</code> build takes only ~2 minutes and the <code>py-runtime-pkg</code> build takes ~5, while the others can take several hours.</p> <p>From your development branch, you can manually run the Schedule Snapshot Release action, which invokes the Build Release Packages action, which finally invokes the Validate and Publish Release action.  If you already have a draft release and know the release id, package version, and run ID from a previous Build Release Packages run, you can also manually run just the Validate and Publish Release action.</p>"},{"location":"developers/debugging/sanitizers/","title":"Sanitizers (ASan/MSan/TSan)","text":"<p>AddressSanitizer, MemorySanitizer and ThreadSanitizer are tools provided by <code>clang</code> to detect certain classes of errors in C/C++ programs. They consist of compiler instrumentation (so your program's executable code is modified) and runtime libraries (so e.g. the <code>malloc</code> function may get replaced).</p> <p>They are abbreviated as \"ASan\", \"MSan\" and \"TSan\" respectively.</p> <p>They all incur large overhead, so only enable them while debugging.</p> Tool Detects Helps debug what? Slowdown Memory overhead Android support ASan Out-of-bounds accesses, use-after-free, use-after-return, memory leaks Crashes, non-deterministic results, memory leaks 2x 3x Yes MSan Uninitialized memory reads Non-deterministic results 3x ? Yes TSan Data races Many bugs in multi-thread code 5x-15x 5x-10x No <p>Note</p> <p>See this documentation on leak detection. It is only enabled by default on some platforms.</p>"},{"location":"developers/debugging/sanitizers/#support-status-and-how-to-enable-each-sanitizer","title":"Support status and how to enable each sanitizer","text":""},{"location":"developers/debugging/sanitizers/#asan-addresssanitizer","title":"ASan (AddressSanitizer)","text":"<p>To enable ASan:</p> <pre><code>cmake -DIREE_ENABLE_ASAN=ON ...\n</code></pre> <p>Several <code>_asan</code> tests like <code>iree/tests/e2e/stablehlo_ops/check_llvm-cpu_local-task_asan_abs.mlir</code> are also defined when using this configuration. These tests include AddressSanitizer in compiled CPU code as well by using these <code>iree-compile</code> flags:</p> <pre><code>--iree-llvmcpu-link-embedded=false\n--iree-llvmcpu-sanitize=address\n</code></pre>"},{"location":"developers/debugging/sanitizers/#linking-to-the-dynamic-asan-runtime","title":"Linking to the dynamic ASan runtime","text":"<p>You may want to use ASan when using the python bindings. One way to achieve this is to build Python (or whatever executable that is going to use IREE as a shared library) with Asan. Another option is to link to the ASan runtime dynamically instead of linking it statically into an executable.</p> <p>Using clang-12 (other versions should also work) as a example, configure IREE with something like:</p> <pre><code>cmake \\\n  -DIREE_ENABLE_ASAN=ON \\\n  -DCMAKE_EXE_LINKER_FLAGS=-shared-libasan \\\n  -DCMAKE_SHARED_LINKER_FLAGS=-shared-libasan \\\n  -DCMAKE_C_COMPILER=clang-12 \\\n  -DCMAKE_CXX_COMPILER=clang++-12 \\\n  ...\n</code></pre> <p>Then when running things the ASan runtime will have to be preloaded.</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\n  python ...\n</code></pre> <p>On Ubuntu the corresponding ASan runtime is provided by a package like <code>libclang-common-12-dev</code> depending on your Clang version. E.g.</p> <pre><code>sudo apt install libclang-common-12-dev llvm-12 clang-12\n</code></pre> <p>Note that during building would also need to preload the ASan runtime, since the build executes its own binaries that are linked against the runtime.</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_OPTIONS=detect_leaks=0 \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\n  cmake --build ...\n</code></pre> <p>Tip</p> <p>If you want to run the IREE CUDA runtime driver it is likely you would need.</p> <pre><code>ASAN_OPTIONS=\"protect_shadow_gap=0\"\n</code></pre> <p>Like this</p> <pre><code>LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.0/lib/linux/libclang_rt.asan-x86_64.so \\\nASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer \\\nASAN_OPTIONS=\"protect_shadow_gap=0\" \\\n  python ...\n</code></pre>"},{"location":"developers/debugging/sanitizers/#tsan-threadsanitizer","title":"TSan (ThreadSanitizer)","text":""},{"location":"developers/debugging/sanitizers/#c-standard-library-with-tsan-support","title":"C++ Standard Library with TSan support","text":"<p>For best results to avoid false positives/negatives TSan needs all userspace code to be compiled with Tsan. This includes <code>libstdc++</code> or <code>libc++</code>. libstdc++ is usually the default C++ runtime on Linux.</p> <p>Building GCC's 12 libstdc++ on Ubuntu 22.04 with Clang has build errors. It seems that GCC and Clang shared their TSan implementation. They may be interoperable, but to avoid problems we should build everything with GCC. This means using GCC both as a compiler and linker.</p>"},{"location":"developers/debugging/sanitizers/#build-libstdc-with-tsan-support","title":"Build libstdc++ with TSan support","text":"<p>Get GCC 12.3 source code.</p> <pre><code>git clone --depth 1 --branch releases/gcc-12.3.0 \\\n  https://github.com/gcc-mirror/gcc.git\n</code></pre> <pre><code>SRC_DIR=$PWD/gcc\nBIN_DIR=$PWD/gcc/build\n</code></pre> <p>Building all dependencies of libstdc++ with TSan has errors during linking of <code>libgcc</code>. libgcc is a dependency of libstdc++. It is desirable to build everything with TSan, but it seems this excludes libgcc, as the TSan runtime <code>libtsan</code> has it as a dependency. We build it without TSan. We do that to make libstdc++'s configuration find <code>gthr-default.h</code>, which is generated during building of libgcc. If not found C++ threads will silently have missing symbols.</p> <pre><code>LIBGCC_BIN_DIR=$BIN_DIR/libgcc\nmkdir -p $LIBGCC_BIN_DIR\ncd $LIBGCC_BIN_DIR\n\n$SRC_DIR/configure \\\n  CC=gcc-12 \\\n  CXX=g++-12 \\\n  --disable-multilib \\\n  --disable-bootstrap \\\n  --enable-languages=c,c++\n\nmake -j$(nproc) --keep-going all-target-libgcc\n</code></pre> <p>Now build libstdc++.</p> <pre><code>LIBSTDCXX_BIN_DIR=$BIN_DIR/libstdc++\nmkdir -p $LIBSTDCXX_BIN_DIR\nLIBSTDCXX_INSTALL_DIR=$BIN_DIR/install/libstdc++\nmkdir -p $LIBSTDCXX_INSTALL_DIR\n\nGTHREAD_INCLUDE_DIR=$LIBGCC_BIN_DIR/x86_64-pc-linux-gnu/libgcc\nCXX_AND_C_FLAGS=\"-I$GTHREAD_INCLUDE_DIR -g -fno-omit-frame-pointer -fsanitize=thread\"\n\ncd $LIBSTDCXX_BIN_DIR\n$SRC_DIR/libstdc++-v3/configure \\\n  CC=gcc-12 \\\n  CXX=g++-12 \\\n  CFLAGS=\"$CXX_AND_C_FLAGS\" \\\n  CXXFLAGS=\"$CXX_AND_C_FLAGS\" \\\n  LDFLAGS=\"-fsanitize=thread\" \\\n  --prefix=$LIBSTDCXX_INSTALL_DIR \\\n  --disable-multilib \\\n  --disable-libstdcxx-pch \\\n  --enable-libstdcxx-threads=yes \\\n  --with-default-libstdcxx-abi=new\n\nmake -j$(nproc)\nmake install\n</code></pre> <p>When running programs you would need to use the sanitized version of libstdc++.</p> <pre><code>LD_LIBRARY_PATH=\"$LIBSTDCXX_INSTALL_DIR/lib\" \\\n  my-program ...\n</code></pre>"},{"location":"developers/debugging/sanitizers/#iree-with-tsan-support","title":"IREE with TSan support","text":"<p>To enable TSan:</p> <pre><code>cmake -DIREE_ENABLE_TSAN=ON ...\n</code></pre> <p>Several <code>_tsan</code> tests like <code>iree/tests/e2e/stablehlo_ops/check_llvm-cpu_local-task_tsan_abs.mlir</code> are also defined when using this configuration. These tests include ThreadSanitizer in compiled CPU code as well by using these <code>iree-compile</code> flags:</p> <pre><code>--iree-llvmcpu-link-embedded=false\n--iree-llvmcpu-sanitize=address\n</code></pre> <p>Note that a IREE runtime built with TSan cannot load a IREE compiled LLVM/CPU module unless those flags are used, so other tests are excluded using the <code>notsan</code> label.</p>"},{"location":"developers/debugging/sanitizers/#msan-memorysanitizer","title":"MSan (MemorySanitizer)","text":"<p>In theory that should be a simple matter of</p> <pre><code>-DIREE_ENABLE_MSAN=ON\n</code></pre> <p>However, that requires making and using a custom build of libc++ with MSan as explained in this documentation.</p> <p>As of April 2022, all of IREE's tests succeeded with MSan on Linux/x86-64, provided that the <code>vulkan</code> driver was disabled (due to lack of MSan instrumentation in the NVIDIA Vulkan driver).</p>"},{"location":"developers/debugging/sanitizers/#ubsan-undefinedbehaviorsanitizer","title":"UBSan (UndefinedBehaviorSanitizer)","text":"<p>Enabling UBSan in the IREE build is a simple matter of setting the <code>IREE_ENABLE_UBSAN</code> CMake option:</p> <pre><code>cmake -DIREE_ENABLE_UBSAN=ON ...\n</code></pre> <p>Note that both ASan and UBSan can be enabled in the same build.</p>"},{"location":"developers/debugging/sanitizers/#symbolizing-the-reports","title":"Symbolizing the reports","text":""},{"location":"developers/debugging/sanitizers/#desktop-platforms","title":"Desktop platforms","text":"<p>On desktop platforms, getting nicely symbolized reports is covered in this documentation. The gist of it is make sure that <code>llvm-symbolizer</code> is in your <code>PATH</code>, or make the <code>ASAN_SYMBOLIZER_PATH</code> environment variable point to it.</p>"},{"location":"developers/debugging/sanitizers/#android","title":"Android","text":"<p>On Android it's more complicated due to this Android NDK issue. Fortunately, we have a script to perform the symbolization. Copy the raw output from the sanitizer and feed it into the <code>stdin</code> of the <code>build_tools/scripts/android_symbolize.sh</code> script, with the <code>ANDROID_NDK</code> environment variable pointing to the NDK root directory, like this:</p> <pre><code>ANDROID_NDK=~/android-ndk-r21d ./build_tools/scripts/android_symbolize.sh &lt; /tmp/asan.txt\n</code></pre> <p>Where <code>/tmp/asan.txt</code> is where you've pasted the raw sanitizer report.</p> <p>Tip</p> <p>This script will happily just echo any line that isn't a stack frame. That means you can feed it the whole <code>ASan</code> report at once, and it will output a symbolized version of it. DO NOT run it on a single stack at a time! That is unlike the symbolizer tool that's being added in NDK r22, and one of the reasons why we prefer to keep our own script. For more details see this comment.</p>"},{"location":"developers/design-docs/cuda-hal-driver/","title":"CUDA HAL driver","text":"<p>This document lists technical details regarding the CUDA implemenation of IREE's Hardware Abstraction Layer, called a CUDA HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using CUDA is mostly straightforward; though there are places we need emulation given no direct mapping concepts or mechanisms.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#overall-design-choices","title":"Overall design choices","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cuda-driver-vs-runtime-api","title":"CUDA driver vs runtime API","text":"<p>IREE HAL's design draws inspiration from modern GPU APIs--it provides explicit control of low-level GPU objects. The compiler is expected to plan the object lifetime and schedule workload and synchronization in an optimized way; IREE HAL implementation and the underlying GPU driver stack is expected to be a thin layer without much smarts and magic.</p> <p>Therefore when implementing the IREE HAL using CUDA, we use the driver API instead of the runtime API. At runtime the HAL CUDA driver will load the <code>libcuda.so</code>/<code>nvcuda.dll</code> library dynamically and query a subset of the CUDA driver API used in HAL via the <code>cuGetProcAddress()</code> API.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#gpu-objects","title":"GPU Objects","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#driver","title":"Driver","text":"<p>There is no direct CUDA construct that map to the IREE HAL <code>iree_hal_driver_t</code> abstraction. We use it to hold the dynamic symbols loaded for all devices, and device enumeration and creation.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_cuda_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to CUDA GPU device by wrapping a <code>CUdevice</code>. For each device, right now we create two <code>CUstream</code>s--one for issuing commands for memory allocation and kernel lauches as instructed by the program; the other for issue host callback functions after dispatched command buffers completes. See synchronization section regarding the details.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#async-allocation","title":"Async allocation","text":"<p>The CUDA HAL drivers supports async allocation (<code>iree_hal_device_queue_alloca()</code> and <code>iree_hal_device_queue_dealloca()</code>) via CUDA stream ordered memory allocation.</p> <p>The <code>async_allocations</code> in the <code>iree_hal_cuda_device_params_t</code> struct allows to enable this feature.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#command-buffer","title":"Command buffer","text":"<p><code>iree_hal_command_buffer_t</code> is a recording of commands to issue to the GPU; when the command buffer is submitted to the device it's then actually executed on the GPU asynchronously.</p> <p>Two implementations of <code>iree_hal_command_buffer_t</code> exist in the CUDA HAL driver--one backed by <code>CUgraph</code> and the other backed by <code>CUstream</code>.</p> <p><code>CUgraph</code> conceptually matches <code>iree_hal_command_buffer_t</code> better given it's a recording of commands to issue to the GPU. Also using the <code>CUgraph</code> API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams. Therefore, the <code>CUgraph</code>-backed implementation is a more natural one. Though note that <code>CUgraph</code> API is meant to be used for recording once and replying multiple times and there may be a performance penalty to using <code>CUgraph</code> API for one-shot command buffer.</p> <p>The <code>CUstream</code>-backed implementation just issues commands directly to a <code>CUstream</code> when recording. Commands issued to <code>CUstream</code> can be immediately sent to the GPU for execution; there is no recording and replaying separation. In order to match the recording semantics of <code>iree_hal_command_buffer_t</code>, to use the <code>CUstream</code>-backed command buffer, we need to first record the command buffer into an in-memory <code>iree_hal_deferred_command_buffer_t</code>, and then when applying the command buffer, we create a new <code>CUstream</code>-backed implementation.</p> <p>The <code>command_buffer_mode</code> in the <code>iree_hal_cuda_device_params_t</code> struct allows to select which implementation to use.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#allocator","title":"Allocator","text":"<p>The allocator will forward allocation requests to <code>cuMemHostAlloc()</code> for host local memory, <code>cuMemAlloc()</code> for device local and host invisible memory, and <code>cuMemAllocManaged()</code> for device local and host visible memory.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#buffer","title":"Buffer","text":"<p>CUDA buffers are represented either as a host pointer or a device pointer of type <code>CUdeviceptr</code>.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#executable","title":"Executable","text":"<p><code>iree_hal_executable_t</code> maps naturally to <code>CUmodule</code>.</p> <p>The compiler generates a FlatBuffer containing a PTX image as well as a list of entry point functions and their associated metadata (names, workgroup size, dynamic shared memory size, etc.). At runtime, the CUDA HAL driver loads the PTX image and creates <code>CUfunction</code>s out of it for various entry points.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#synchronization","title":"Synchronization","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#event","title":"Event","text":"<p><code>iree_hal_event_t</code> right now is not used in the compiler so it's not yet implemented in the CUDA HAL driver.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#semaphore","title":"Semaphore","text":"<p>The IREE HAL uses semaphores to synchronize work between host CPU threads and device GPU streams. It's a unified primitive that covers all directions--host to host, host to device, device to host, and device to device, and allows flexible signal and wait ordering--signal before wait, or wait before signal. There is no limit on the number of waits of the same value too.</p> <p>The core state of a HAL semaphore consists of a monotonically increasing 64-bit integer value, which forms a timeline--signaling the semaphore to a larger value advances the timeline and unblocks work waiting on some earlier values. The semantics closely mirrors Vulkan timeline semaphore.</p> <p>In CUDA, there is no direct equivalent primitives providing all the capabilities needed by the HAL semaphore abstraction:</p> <ul> <li>Stream memory operations provides <code>cuStreamWriteValue64()</code> and   <code>cuStreamWaitValue64()</code>, which can implment HAL semaphore 64-bit integer value   signal and wait. Though these operations require device pointers and cannot   accepts pointers to managed memory buffers, meaning no support for the host.   Additionally, per the spec, \"synchronization ordering established through   these APIs is not visible to CUDA. CUDA tasks that are (even indirectly)   ordered by these APIs should also have that order expressed with   CUDA-visible dependencies such as events.\" So it's not suitable for   integration with other CUDA components.</li> <li>For external resource interoperability, we have APIs   like <code>cuSignalExternalSemaphoresAsync()</code> and <code>cuWaitExternalSemaphoresAsync()</code>,   which can directly map to Vulkan timeline semaphores. Though these APIs are   meant to handle exernal resources--there is no way to create   <code>CUexternalSemaphore</code> objects directly other than <code>cuImportExternalSemaphore()</code>.</li> </ul> <p>Therefore, to implement the support, we need to leverage multiple native CPU or CUDA primitives under the hood.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cuevent-capabilities","title":"<code>CUevent</code> capabilities","text":"<p>The main synchronization mechanism is CUDA event--<code>CUevent</code>. As a functionality and integration baseline, we use <code>CUevent</code> to implement the IREE HAL semaphore abstraction.</p> <p><code>CUevent</code> natively supports the following capabilities:</p> <ul> <li>State: binary; either unsignaled or signaled. There can exist multiple   waits (e.g., via <code>cuEventSynchronize()</code> or <code>cuGraphAddEventWaitNode()</code>) for   the same <code>CUevent</code> signal (e.g., via <code>cuEventRecord()</code> or   <code>cuGraphAddEventRecordNode()</code>).</li> <li>Ordering: must be signal before wait. Waiting before signal would mean   waiting an empty set of work, or previously recorded work.</li> <li>Direction: device to device, device to host.</li> </ul> <p>We need to fill the remaining capability gaps. Before going into details, the overall approach would be to:</p> <ul> <li>State: we need a 64-bit integer value timeline. Given the binary state of   a <code>CUevent</code>, each <code>CUevent</code> would just be a \"timepoint\" on the timeline.</li> <li>Ordering: we need to defer releasing the workload to the GPU until the   semaphore waits are reached on the host, or we can have some device   <code>CUevent</code> to wait on.</li> <li>Direction: host to host and host to device is missing; we can support that   with host synchronization mechanisms.</li> </ul>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#signal-to-wait-analysis","title":"Signal to wait analysis","text":"<p>Concretely, for a given HAL semaphore, looking at the four directions:</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#cpu-signal","title":"CPU signal","text":"<p>A CPU thread signals the semaphore timeline to a new value.</p> <p>If there are CPU waits, it is purely on the CPU side. We just need to use common CPU notification mechanisms. In IREE we have <code>iree_event_t</code> wrapping various low-level OS primitives for it. So we can just use that to represent a wait timepoint. We need to keep track of all CPU wait timepoints in the timeline. After a new signaled value, go through the timeline and notify all those waiting on earlier values.</p> <p>If there are GPU waits, given that there are no way we can signal a <code>CUevent</code> on CPU, one way to handle this is to cache and defer the submission batches by ourselves until CPU signals past the desired value. To support this, we would need to implement a deferred/pending actions queue.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#gpu-signal","title":"GPU signal","text":"<p>GPU signals can only be through a <code>CUevent</code> object, which has a binary state. We need to advance the timeline too. One way is to use <code>cuLaunchHostFunc()</code> to advance from the CPU side with <code>iree_hal_semaphore_list_signal()</code>. This additionally would mean we can reuse the logic form CPU signaling to unblock CPU waits.</p> <p>After advancing the timeline from the CPU side with <code>cuLaunchHostFunc()</code>, we can release more workload from the deferred/pending actions queue to the GPU. Though, per the documentation of <code>cuLaunchHostFunc()</code>, \"the host function must not make any CUDA API calls.\" So we cannot do that directly inside <code>cuLaunchHostFunc()</code>; we need to notify another separate thread to call CUDA APIs to push more work to the GPU. So the deferred/pending action queue should have an associcated thread.</p> <p>For GPU waits, we can also leverage the same logic--using CPU signaling to unblock deferred GPU queue actions. Though this is performant, given that the CPU is involved for GPU internal synchronization. We want to use <code>CUevent</code> instead:</p> <ul> <li>We keep track of all GPU signals in the timeline. Once we see a GPU wait   request, try to scan the timeline to find a GPU signal that advances the   timeline past the desired value, and use that for waiting instead. (This   actually applies to CPU waits too, and it's an optimization over pure   CPU side <code>iree_event_t</code> polling.)</li> <li>We may not see GPU signal before seeing GPU wait requests, then we can also   keep track of all GPU waits in the timeline. Later once see either a CPU   signal or GPU signal advancing past the waited value, we can handle them   accordingly--submitting immediately or associating the <code>CUevent</code>.   This would also guarantee the requirement of <code>CUevent</code>--recording should   happen before waiting.</li> <li>We can use the same <code>CUevent</code> to unblock multiple GPU waits. That's allowed,   though it would mean we need to be careful regarding <code>CUevent</code> lifetime   management. Here we can use reference counting to see how many timepoints   are using it and automatically return to a pool once done.</li> </ul> <p>Another problem is that per the <code>cuLaunchHostFunc()</code> doc, \"the function will be called after currently enqueued work and will block work added after it.\" We don't want the blocking behavior involving host. So we can use a dedicated <code>CUstream</code> for launching the host function, waiting on the <code>CUevent</code> from the original stream too. We can also handle resource deallocation together there.</p>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-hal-driver/#data-structures","title":"Data structures","text":"<p>To summarize, we need the following data structures to implement HAL semaphore:</p> <ul> <li><code>iree_event_t</code>: CPU notification mechanism wrapping low-level OS primitives.   Used by host wait timepoints.</li> <li><code>iree_event_pool_t</code>: a pool for CPU <code>iree_event_t</code> objects to recycle.</li> <li><code>iree_hal_cuda_event_t</code>: GPU notification mechanism wrapping a <code>CUevent</code> and   a reference count. Used by device signal and wait timepoints. Associates with   a <code>iree_hal_cuda_event_pool_t</code> pool--returns to the pool directly on once   reference count goes to 0.</li> <li><code>iree_hal_cuda_event_pool_t</code>: a pool for GPU <code>iree_hal_cuda_event_t</code> objects   to recycle.</li> <li><code>iree_hal_cuda_timepoint_t</code>: an object that wraps a CPU <code>iree_event_t</code> or   GPU <code>iree_hal_cuda_event_t</code> to represent wait/signal of a timepoint on a   timeline.</li> <li><code>iree_hal_cuda_timepoint_pool_t</code>: a pool for <code>iree_hal_cuda_timepoint_t</code>   objects to recycle. This pool builds upon the CPU and GPU event pool--it   acquires CPU/GPU event objects there.</li> <li><code>iree_hal_cuda_timeline_semaphore_t</code>: contains a list of CPU wait and GPU   wait/signal timepoints.</li> <li><code>iree_hal_cuda_queue_action_t</code>: a pending queue action (kernel launch or   stream-ordered allocation).</li> <li><code>iree_hal_cuda_pending_queue_actions_t</code>: a data structure to manage pending   queue actions. It provides APIs to enqueue actions, and advance the queue on   demand--queue actions are released to the GPU when all their wait semaphores   are signaled past the desired value, or we can have a <code>CUevent</code> object already   recorded to some <code>CUstream</code> to wait on.</li> </ul>","tags":["GPU","CUDA"]},{"location":"developers/design-docs/design-roadmap/","title":"Design roadmap","text":"<p>A not-so-concise walkthrough of various IREE features that are in the design process and planned for future versions. A lot of the questions around how the IREE IR is designed and why certain components exist (such as the VM) hopefully become much clearer when seeing where we want to go with the infrastructure we are building (as opposed to where we currently are with our MVP slice). This document is not meant to encompass the entire design of any individual feature and if there's interest please say hi on the iree-discuss mailing list.</p> <ul> <li>Design roadmap<ul> <li>Input Dialects<ul> <li>Quantization</li> </ul> </li> <li>flow: Data- and Execution-Flow Modeling<ul> <li>Avoiding Readbacks with flow.stream</li> <li>Threading flow.stream through the CFG</li> <li>Predication of flow.dispatch</li> <li>Deduping flow.executables</li> <li>Rematerializing CSE'd Expressions</li> <li>Device Placement</li> </ul> </li> <li>hal: Hardware Abstraction Layer and Multi-Architecture Executables<ul> <li>Allow Targets to Specify hal.interfaces</li> <li>Target-specific Scheduling Specialization</li> <li>Buffer Usage Tracking</li> <li>Batched Executable Caching and Precompilation</li> <li>Target-aware Executable Compression</li> <li>Target-aware Constant Compression</li> <li>Command Buffer Stateful Deduplication</li> <li>Resource Timeline</li> <li>Transient Tensor Ringbuffer</li> <li>Timeline Semaphores on the Module ABI</li> <li>GPU-like CPU Scheduling</li> </ul> </li> <li>vm: Lightweight Virtual Machine<ul> <li>Coroutines for Batching and Cooperative Scheduling<ul> <li>Cellular Batching</li> </ul> </li> <li>Lowering to LLVM IR</li> <li>Improved Type Support</li> <li>Indirect Command Buffer/On-Accelerator Execution</li> </ul> </li> </ul> </li> </ul>"},{"location":"developers/design-docs/design-roadmap/#input-dialects","title":"Input Dialects","text":""},{"location":"developers/design-docs/design-roadmap/#quantization","title":"Quantization","text":"<p>It's assumed that any work related to quantization/compression has happened prior to lowering into IREE dialects. Our plan is to use the proposed Quantization Transforms to achieve both training and inference-time quantization of types in a way that preserves maximum accuracy. IREE will support running with original unquantized floats in all cases, allowing for a smooth on-ramp to quantization and the gains in performance and reduction in model size that come from it.</p> <p>As future work IREE would like to move beyond these transformation-directed approaches to quantization and interface directly to frontends which have a defined enough type system to represent accurate quantized (and otherwise compressed) computations directly, not relying exclusively on compiler-side type inference transforms.</p>"},{"location":"developers/design-docs/design-roadmap/#flow-data-and-execution-flow-modeling","title":"<code>flow</code>: Data- and Execution-Flow Modeling","text":"<p>The <code>flow</code> dialect is designed to allow us to extract as much concurrency as possible from a program and partition IR into the scheduling and execution domains. Today we have the IR structure and transformation flow in place but have not yet got to the most interesting things such an infrastructure enables. A majority of the largest performance, latency, and memory usage improvements IREE can offer are determined first here and all following lowerings benefit. The fastest code is the code you don't execute and the smallest allocation is the allocation you don't make ;)</p>"},{"location":"developers/design-docs/design-roadmap/#avoiding-readbacks-with-flowstream","title":"Avoiding Readbacks with <code>flow.stream</code>","text":"<p>A majority of the readbacks we have today (manifested as <code>flow.tensor.load.*</code> ops) will be removed when we have an HLO tensor-&gt;primitive conversion. There will still be cases when readbacks are required for correctness but they usually fall into a small set of usage patterns. For those that don't this is one place where IREE will warn about performance issues, allowing programs that perform suboptimally but encouraging authors to adjust their input model to enable better behavior. The IREE VM also has specific support for hiding readback latency in an efficient way via coroutines.</p> <p>The most common case we are currently seeing in the IR is that of dynamic copies where the offsets are dependent on the result of previous computations. Source models may have top-k + gather operations, for example. These appear as a <code>flow.stream</code>, a <code>flow.tensor.load</code>, and then another <code>flow.stream</code> that uses the loaded value for a <code>flow.tensor.update</code> (or other operation):</p> <pre><code>%index_tensor = flow.ex.stream.fragment(...) -&gt; tensor&lt;i32&gt; { ... }\n%index = flow.tensor.load %index_tensor : tensor&lt;i32&gt;\n%result = flow.ex.stream.fragment(%arg0 = %index : i32, ...) -&gt; ... {\n  %0 = flow.dispatch ...\n  %1 = flow.tensor.update %0, %arg2[%index] : tensor&lt;10xf32&gt; -&gt; tensor&lt;1x10xf32&gt;\n  ...\n}\n</code></pre> <p>Today the <code>flow.tensor.update</code> turns into HAL command buffer transfer operations that must have their offsets known at recording time. This is a limitation of <code>vkCmdCopyBuffer</code> but not a fundamental limitation of any hardware. In fact several drivers implement copies as small built-in shader programs meaning that we could perform the same expansion here with the right primitives. This would allow, in the above example, both the index to be computed and the tensor to be updated within the same stream to entirely remove the host round-trip.</p>"},{"location":"developers/design-docs/design-roadmap/#threading-flowstream-through-the-cfg","title":"Threading <code>flow.stream</code> through the CFG","text":"<p>The current <code>flow.ex.stream.fragment</code>, as denoted by the <code>ex</code>perimental tag, is a temporary implementation designed to get the concept of streams lowered to the HAL dialect. For streams to be effective at modeling larger concurrency scopes they need to be able to move across branches in the CFG. This intuitively follows exactly what one would do if recording commands in C:</p> <pre><code>vkCmdCopyBuffer(cmd, ...);\nif (some_flag) {\n  vkCmdBindPipeline(cmd, ..., pipeline_a);\n} else {\n  vkCmdBindPipeline(cmd, ..., pipeline_b);\n}\nvkCmdDispatch(cmd, ...);\n</code></pre> <p>The corresponding <code>flow</code> IR:</p> <pre><code>  flow.stream.append[%s0](...) {\n    flow.tensor.update ...\n  }\n  %b = arith.cmpi ne %some_flag, ...\n  cond_br %b, ^a(%s0), ^b(%s0)\n^a(%s1):\n  flow.stream.append[%s1](...) {\n    flow.dispatch @pipeline_a, ...\n  }\n  br ^end(%s1)\n^b(%s2):\n  flow.stream.append[%s2](...) {\n    flow.dispatch @pipeline_b, ...\n  }\n  br ^end(%s2)\n^end(%s3):\n  ...\n</code></pre> <p>This allows the entire stream to be lowered into one command buffer without the need for any host round-trips. The conversion into the <code>flow</code> dialect will walk the CFG and attempt to thread the <code>flow.stream</code> values through so long as there are no external dependencies.</p>"},{"location":"developers/design-docs/design-roadmap/#predication-of-flowdispatch","title":"Predication of <code>flow.dispatch</code>","text":"<p>While the <code>flow.stream</code> threading through the CFG can remove many of the simpler conditional dispatches there will always be some that will have their execution dependent on the result of prior dispatches. For these a <code>flow.cond_dispatch</code> will allow a condition to be provided that must be true for the dispatch to actually be performed.</p> <p>For targets that natively support predication in their command buffers (such as D3D12's ID3D12GraphicsCommandList::SetPredication) this provides a host round-trip-free way of conditionally executing dispatches and transfers. Unfortunately Vulkan support is still lacking, though Nvidia supports the VK_EXT_conditional_rendering extension that exposes the same behavior.</p> <p>For targets that do not support predication natively it's still possible to emulate predication with indirect dispatches. In this model the workgroup counts normally used to dispatch execution are sourced from another device buffer at the time the dispatch is made instead of sourced from the command buffer at the time the dispatch is recorded. Degenerate dispatches with counts of <code>0, 0, 0</code> allow for effective neutering of the dispatch with minimal overhead (vs. the significant penalty of a host round-trip!).</p> <p>By modeling such predication at the <code>flow</code> level we are able to lower into the HAL with target-aware predication semantics and fuse indirect dispatch workgroup count calculations into existing dispatches already being performed such that overhead is reduced.</p>"},{"location":"developers/design-docs/design-roadmap/#deduping-flowexecutables","title":"Deduping <code>flow.executable</code>s","text":"<p>While still in the <code>flow</code> dialect, the executables are target-agnostic. This makes simple IR tree diffing a potential solution to deduplication. Since most of the dispatches originate from the same source-language library calls in input frameworks there's a high likelihood of duplication, and depending on when inlining is performed we may have stronger or weaker ability to perform the deduplication. Thanks to the MLIR canonicalization pass (that ensures ops are rearranged into consistent canonical representations) the IR comparisons can be done rather trivially.</p>"},{"location":"developers/design-docs/design-roadmap/#rematerializing-csed-expressions","title":"Rematerializing CSE'd Expressions","text":"<p>Common subexpression elimination is performed many times during lowering, however there comes a point where the CSE can introduce false dependencies and additional allocations that are otherwise avoidable. For example if a broadcasting operation is CSE'd and then the result is used by two or more operations that are scheduled independently what would have been a relatively cheap lowering of the broadcast to a simple index remapping now becomes an additional dispatch, materialization of an intermediate tensor, and a barrier:</p> <pre><code>%bcast = \"mhlo.broadcast_in_dim\"(%cst) : (tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt;\n%mul1 = mhlo.multiply %arg0, %bcast : tensor&lt;1024x10xf32&gt;\n// (pretend something here that prevents fusion)\n%mul2 = mhlo.multiply %arg1, %bcast : tensor&lt;1024x10xf32&gt;\n</code></pre> <pre><code>%bcast = flow.dispatch.region(%cst : tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %0 = \"mhlo.broadcast_in_dim\"(%cst) : (tensor&lt;f32&gt;) -&gt; tensor&lt;1024x10xf32&gt;\n  return %0 : tensor&lt;1024x10xf32&gt;\n}\n// a barrier will be required here\n%mul1 = flow.dispatch.region(%arg0 : tensor&lt;1024x10xf32&gt;, %bcast : tensor&lt;1024x10xf32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %1 = mhlo.multiply %arg0, %bcast : tensor&lt;1024x10xf32&gt;\n  return %1 : tensor&lt;1024x10xf32&gt;\n}\n%mul2 = flow.dispatch.region(%arg1 : tensor&lt;1024x10xf32&gt;, %bcast : tensor&lt;1024x10xf32&gt;) -&gt; tensor&lt;1024x10xf32&gt; {\n  %2 = mhlo.multiply %arg1, %bcast : tensor&lt;1024x10xf32&gt;\n  return %2 : tensor&lt;1024x10xf32&gt;\n}\n</code></pre> <p>Instead the broadcast should be rematerialized inside of both dispatch regions as the cost of doing so is significantly less in compute resources and then the intermediate tensor will not be required at all. Though at first it may seem counter-intuitive to undo such a critical optimization as CSE (both to code size and often to compute) but here it's something we must carefully balance while looking at the whole system. It gets even more important when considering multi-device execution as the cost of sharing memory and synchronizing may be extremely non-trivial.</p>"},{"location":"developers/design-docs/design-roadmap/#device-placement","title":"Device Placement","text":"<p>While still within the <code>flow</code> dialect we have the ability to easily split streams and safely shuffle around operations. Target execution backends can opt into such behavior to ensure that device restrictions such as maximum in-flight memory, maximum scheduling depth, and capabilities are observed. For heterogeneous configurations the intent is that certain operations, dispatches, and streams can be attributed to specify which device categories they should be lowered. The constraint solving that takes place can be provided with generic heuristics (\"big GEMMs go on the accelerator\"), profile-guided databases based on benchmarks, learned traits via ML, etc.</p>"},{"location":"developers/design-docs/design-roadmap/#hal-hardware-abstraction-layer-and-multi-architecture-executables","title":"<code>hal</code>: Hardware Abstraction Layer and Multi-Architecture Executables","text":"<p>As the IREE HAL is designed almost 1:1 with a compute-only Vulkan API many of the techniques classically used in real-time graphics apply. The benefit we have by modeling our usage of such a low-level API in IR is that the normal work - some of which is very non-trivial - for managing allocations, tracking resource lifetime, and ensuring proper synchronization/barriers is something we can apply the full force of an offline compiler against.</p>"},{"location":"developers/design-docs/design-roadmap/#allow-targets-to-specify-halinterfaces","title":"Allow Targets to Specify <code>hal.interface</code>s","text":"<p>The <code>hal.interface</code> op specifies the ABI between the scheduler and the device containing the buffer bindings and additional non-buffer data (parameters, shapes, specialization flags, etc). Today a na\u00efve ordering is used uniformly for all targets however it is possible for target backends to opt into providing their own interfaces based on target configuration. The same <code>hal.executable</code> may have multiple interfaces and the same backend may use one or more. This is useful for when target capabilities may vary at runtime, such as the number of available storage buffer bindings in Vulkan. By exposing a few <code>hal.interface</code> variants with different binding amounts the Vulkan backend could make better use of the larger number of bindings available at runtime while still providing support for smaller configurations.</p> <p>Once we have multiple <code>hal.interface</code>s defined for executables the scheduler needs to emit HAL ops that properly switch between them. By having a canonical form for bindings we can ensure that only the differences between the interfaces will need additional code.</p>"},{"location":"developers/design-docs/design-roadmap/#target-specific-scheduling-specialization","title":"Target-specific Scheduling Specialization","text":"<p>Though the <code>flow</code> dialect attempts to fuse as many ops as possible into dispatch regions, it's not always possible for all target backends to schedule a region as a single dispatch. A classic example is algorithms like parallel reduction commonly used on GPUs that may require many dispatches to identical executables, while other algorithms may vary the executables they use based on the input parameters such as shape or the target runtime device support.</p> <p>By default the <code>flow.dispatch</code> executable translation to <code>hal.executable</code>s is performed 1:1 and it is assumed that a single dispatch is required. Extending target backends with scheduling interfaces (enabling them to opt into different scheduling behavior) will allow the backends to emit any number of <code>hal.executable</code>s and any stream commands (such as additional dispatches or transfers) they may need. This is effectively equivalent to what would be done at runtime only because we are still operating on IR prior to buffer allocation and can use the <code>hal</code> ringbuffer primitive. Through this we can elide many of the allocations that would otherwise be required at runtime (and the concurrency-limiting false dependencies that usually come along with scratch memory).</p> <p>Since the algorithm used may vary based on the parameters of the dispatch (such as the shape of the reduction which may be dynamically determined) scheduling specialization may occur even when targeting a single backend. In many cases folding and canonicalization can eliminate the overhead as whether one dynamically computed workgroup size is used instead of another the same IR is present.</p>"},{"location":"developers/design-docs/design-roadmap/#buffer-usage-tracking","title":"Buffer Usage Tracking","text":"<p>Many explicit hardware APIs require knowing how buffers are used alongside with where they should be located. For example this additional information determines caching policy on buffer accesses (write-through, write-back, etc), visibility of writes across compute units, and the possible MMU properties that may need to be maintained/matched for the buffer. By using the SSA-form value-semantics of the MLIR <code>tensor</code> as used in the <code>flow</code> dialect we have complete information of where buffers may be used or at least where they enter or leave regions where we can derive such information.</p> <p>Analysis passes can run over IR to attribute tensors such that when allocation is performed when lowering to the <code>hal</code> dialect we do so from an allocator compatible with where the buffer will be used, with memory types chosen based on the potential cost and location of operations performed (write-only on host vs. read-write on host and device, etc), and with usage bits indicating what kind of operations may be performed on the buffer. Many of these are local transformations as most buffers are only live within very small regions such as the <code>flow.stream</code> encompassing their usage.</p> <p>Traditional systems need to either use very permissive buffer properties or heuristics that can introduce additional non-trivial overhead when such heuristics are incorrect. For example, OpenGL had several such usage hints that drivers were then able to use but almost no drivers behaved as desired in all cases and it lead to additional memory ghosting, copies, readbacks, and unpredictable performance. For almost all uses of the buffers within an IREE invocation we instead can know precisely where and how buffers may need to be moved and do it a minimum number of times if it is required.</p>"},{"location":"developers/design-docs/design-roadmap/#batched-executable-caching-and-precompilation","title":"Batched Executable Caching and Precompilation","text":"<p>For targets that may require runtime preprocessing of their executables prior to dispatch, such as SPIR-V or MSL, the IREE HAL provides a caching and batch compilation mechanism based on Vulkan's Pipeline Cache.</p> <p>Today each executable is compiled on-demand and cached only for the process lifetime. Though some drivers may provide their own caching we can make better use of the explicit caching and compilation behavior with the additional information we have in the compiler.</p> <p>For any given entry point (or group of entry points) into an IREE module we can perform reachability analysis to know which executables may be executed when that entry point is invoked. In this way we can emit pre-invocation compilation checks (similar to an <code>std::call_once</code> block) that provides all required executables for compilation and allows more efficient compilation through multithreading the compiler invocations. These same compilation caching function can be exposed and invoked manually by an application to force pre-compilation when it is least likely to impact the user, such as a post-install/first-run step or concurrently while other application features are loading.</p> <p>We can use zero or more scoped caches for executables within a module. Completely dynamic modules (such as those emitted in eager-mode usage) may avoid the caching overhead entirely, while modules that have several primary usage modes (such as training and inference) may choose to use independent caches for each such mode.</p> <p>The caches generated can then be retrieved and saved by the hosting application. Upon the next execution the application can provide the caches and if still valid they will be used to avoid compilation.</p>"},{"location":"developers/design-docs/design-roadmap/#target-aware-executable-compression","title":"Target-aware Executable Compression","text":"<p>An advantage of representing executable binaries in IR after translation is that we can apply various post-compilation compression and minification techniques while still know precisely where the executable will be used. This is extremely important for SPIR-V as it is not designed to be a small at-rest format. Though the biggest lever we have to control generated code size is higher-level deduplication and specialization there will still be a sufficiently large number of executable binaries we will need to embed within the final modules and having targeted approaches for reducing their size beyond just \"gzip everything\" is very powerful.</p> <p>For example, SMOL-V is a fantastic lossless SPIR-V compression technique that, when coupled with modern dictionary-based compression algorithms, can save significant binary size. As a data point, the SPIR-V corpus SMOL-V uses for testing goes from 4.8MiB of raw SPIR-V to 348KiB of compressed SMOL-V.</p> <p>Combined with Batched Executable Caching and Precompilation we can easily use shared dictionaries and other cross-artifact compression in a relatively plug-in way.</p>"},{"location":"developers/design-docs/design-roadmap/#target-aware-constant-compression","title":"Target-aware Constant Compression","text":"<p>It's still an area that needs more research but one goal of the IREE design was to enable efficient target- and context-aware compression of large constants (typically model weights/parameters/embeddings). This may mean reusing existing hardware compression formats on GPUs, ML accelerator-specific formats, or very-low-bit-depth (1-4 bit per value) quantization techniques that cannot be directly used without first decompressing. The inspiration here is formats like Crunch and Basis Universal that perform \"supercompression\", and we may even be able to use these directly as then we can make use of GPU hardware samplers to do the 4-bit to 32-bit decompression, etc.</p>"},{"location":"developers/design-docs/design-roadmap/#command-buffer-stateful-deduplication","title":"Command Buffer Stateful Deduplication","text":"<p>The IREE HAL - much like Vulkan it is based on - eschews much of the state that traditional APIs have in favor of (mostly) immutable state objects (pipeline layouts, pipeline states, descriptor sets, etc). There are still a few stateful entry points in the API, though, and deduplicating or reordering redundant calls can reduce both IR, API, and execution overhead.</p> <p>The key place this will have the largest impact is around descriptor set bindings and push descriptors, both of which are state and can have non-trivial setup overhead. A canonicalization for such commands that inspects the target <code>hal.command_buffer</code> to see if the same state was set prior and code motion to move such commands out of loop bodies when possible would be helpful.</p>"},{"location":"developers/design-docs/design-roadmap/#resource-timeline","title":"Resource Timeline","text":"<p>A core concept of the IREE scheduler that allows for overlapping in-flight invocations is that of the resource timeline. This identifies module state that can be in use by multiple invocations and assigns timeline milestones denoting when the resource will be in the appropriate state for the current invocation to proceed. Conceptually it is like a epoch-based synchronization mechanism as commonly found in garbage collectors to allow for lock-free asynchronous memory reclamation.</p> <p>The advantage we have in the IR is that we know both the usage of all resources thanks to buffer usage tracking and the synchronization domains of all resources (in most cases). This allows us to effectively assign one timeline semaphore per writeable resource while in practice having far fewer than 1:1, as for example if two resources are only ever written in the same command buffer only one semaphore is needed to signal the completion of both writes.</p> <p>By transforming IR to sink all resource reads and writes closest to where the value is used we can enlarge the time windows that can overlap across invocations that may share those resources. This is similar to what out-of-order CPUs do with register renaming/reorder buffers/etc and something we can apply some traditional instruction scheduling techniques to (only here our 'instructions' are entire command buffer dispatches/transfers).</p> <p>Two degenerate cases of this approach are that of resource indirection (<code>util.ptr&lt;tensor&lt;T&gt;&gt;</code>) and dynamic resource shapes. In these two cases it may not be possible to continue recording commands even if we are able to ensure execution is appropriately synchronized. This is where indirect dispatch, predication, indirect command buffers, and VM coroutines can all help cover for the times where we are unable to transform away the indirection or emit shape logic without data dependencies.</p>"},{"location":"developers/design-docs/design-roadmap/#transient-tensor-ringbuffer","title":"Transient Tensor Ringbuffer","text":"<p>(When properly implemented) almost all buffers required during execution never escape the command buffers they are used in or a single VM invocation. We can trivially identify this from the explicit captures of <code>flow.stream</code> and <code>flow.dispatch</code> ops and the fact that all tensor types have value-semantics. Only those tensor values loaded-from/stored-to module state or that cross the exported module function boundary need special consideration while almost everything else can live transiently only so long as it is required during execution.</p> <p>Thanks to this information about buffer usage and lifetime we can use a ringbuffer to store the transient tensor data and other required data reservations such as uniform buffers used to pass dynamic parameters (shapes, flags, etc) into dispatches. This gives the compiler and the application a knob that allows them to control maximum concurrency (by having a very large ringbuffer) or maximum memory usage (by having a minimally small ringbuffer).</p> <p>Allocating tensors from the ringbuffer does not require sophisticated runtime packing as we can emit IR to calculate required sizes for dynamically shaped tensors. Whether a basic block reserves <code>%sz = arith.constant 42 : index</code> bytes or <code>%sz = std.muli %cst, %dyn_dim : index</code> bytes doesn't materially change how the allocations are performed. Since almost all usage involves simple write head bumps there is no need for ahead-of-time memory planning or large fixed allocations, and since no buffer within the ringbuffer can alias we can have coarse (read: low overhead) guarantees about the availability of certain regions of the ringbuffer (\"when this event is signaled all prior ringbuffer writes have completed\").</p> <p>Usually any planning we may want to perform can be done in IR via code motion. For example applying traditional algorithms used to reduce register pressure will help us attain narrower live windows within the ringbuffer leading to a larger number of in-flight operations for the same ringbuffer memory usage.</p> <p>We may end up using both a classical ringbuffer and a variant known as the bip buffer because it is better for descriptor set utilization (as we can provide many dispatch parameters with a single base offset bound once at the beginning of a region).</p>"},{"location":"developers/design-docs/design-roadmap/#timeline-semaphores-on-the-module-abi","title":"Timeline Semaphores on the Module ABI","text":"<p>Functions calls made across modules (either from C++ into the VM, VM-&gt;VM, or VM-&gt;C++) should be able to define timeline semaphores used to wait and signal on the call. We can do this by making all exports automatically have the semaphores and then make invocations populate them if they were not provided by the caller. In this way we can allow multiple invocations of exported functions to chain naturally with internal asynchronous workloads, turning most IREE invocations into just recording of command buffers that can never block.</p> <p>When combined with VM coroutine support we even have the ability to interleave any required host execution between the wait and signal semaphores provided such that the caller never knows on which device execution is taking place. It's still possible to provide synchronous wrappers that emulate blocking behavior but by having the core system designed around a single system-supported primitive we avoid the need for additional things like interrupt watchdog threads, implicit blocking, and other pitfalls.</p>"},{"location":"developers/design-docs/design-roadmap/#gpu-like-cpu-scheduling","title":"GPU-like CPU Scheduling","text":"<p>One approach to using multiple cores on a CPU is to perform interior parallelization of operations such as OpenMP or library-call-based custom thread pools (gemmlowp). This works when each individual operation is relatively costly vs. potential pipeline bubbles caused by work spinning down near the end of an operation and spinning up at the beginning of the next.</p> <p>IREE is designed to handle many more workloads - some of which have very narrow shapes but very deep pipelines (like search algorithms) - such that the above approach of multithreading within ops becomes a bottleneck. These workloads are traditionally very poorly handled by frameworks and issues with oversubscription, pipeline stalls, and suboptimal system schedulers (such as on Android) can lead to more time being spent thrashing about than actually executing real work.</p> <p>The approach we take here is to treat the cores of a CPU as if they were computation units on a GPU, each able to perform some set of heterogeneous work independent of others units. This means that the concurrency we are trying to model at the <code>flow</code> level and communicate to the runtime via the <code>hal</code> that explicitly states which dispatches can overlap and the size of the workgroups can trivially be used to distribute this work over many cores exactly as a GPU would do it. Integration with library calls that may require their own threading (such as Ruy) requires that they be able to use the IREE thread pool instead of their own.</p> <p>In this way we can avoid pipeline bubbles and other latency-inducing unpredictable scheduling. This does not mean that we treat individual units of work at the same scale as we would for GPUs, but instead that we tile and have one or more processing units that allows us to work on those tiles. Whether the tile size is defined by a library call contract, heuristics, or empirically is TBD, but expect workgroup sizes in the thousands to millions of invocations vs. normal GPU workgroup sizes in the dozens to hundreds of invocations.</p> <p>To achieve this style of scheduling efficiently we'll likely use something like marl as the scheduler. Marl provides cross-platform low-overhead fibers and is compatible with this style of scheduling as it was built for the Swiftshader software rasterizer.</p> <p>Even if IREE was only targeting CPUs the assertion is that we would still want to schedule this way and it's only an incidental benefit that if building for heterogeneous targets the scheduling code may be shared (just with a different divisor for workgroup count calculations).</p>"},{"location":"developers/design-docs/design-roadmap/#vm-lightweight-virtual-machine","title":"<code>vm</code>: Lightweight Virtual Machine","text":"<p>The VM is designed as a dynamic linkage ABI, stable bytecode representation, and intermediate lowering IR. Many of the optimizations we can perform on it will benefit all use cases (such as when lowering to LLVM IR) by allowing higher-level program transformations around synchronization that are difficult to perform on arbitrary LLVM IR.</p>"},{"location":"developers/design-docs/design-roadmap/#coroutines-for-batching-and-cooperative-scheduling","title":"Coroutines for Batching and Cooperative Scheduling","text":"<p>One of the largest features currently missing from the VM is coroutines (aka user-mode fiber scheduling). Coroutines are what will allow us to have multiple in-flight invocations into a module - some of which may be waiting on external events - without the need for complex multithreading logic or state machine machinations.</p> <p>In many cases once semaphores are exposed to callers we will not need to yield in the VM. The user will call into the module with provided semaphores, the work to perform will be recorded to one or more command buffers and submitted to the device, and then control return will return to the caller immediately.</p> <p>In cases requiring host readbacks that we were not able to remove, however, additional VM code may need to run prior to when the final semaphore is signaled. To preserve the asynchronous interface and immediate execution guarantees the compiler can emit explicit yield points (<code>vm.yield</code>) that are known-good locations for yielding (such as most resources not required after the yield having been flushed/discarded, partial synchronization scope availability if other work may be able to execute concurrently irrespective of the yielded coroutine, etc).</p> <p>When the VM encounters the yield at runtime it will suspend the coroutine until a defined condition is met. Many coroutines can be in various states at any given time and - thanks to the resource timeline - can still be memory safe. For example if two stateless invocations are made with a common wait semaphore both can be recorded and submitted without waiting on each other. If there is internal module state accessed the invocations are implicitly ordered by invocation order (similar to what Vulkan calls API order) based on internal resource timeline semaphores.</p> <p>Waking the coroutines can be performed by either an application-provided callback in the case of the application already having a periodic event which is doing bookkeeping (such as frame end callbacks when rendering or Looper idle events on Android), giving direct control over the frequency and location which IREE utilizes to perform additional work. A helper will be provided as well that runs a dedicated IREE thread to do this, but the expectation is that applications can often do a better (and importantly more predictable) job.</p> <p>By utilizing coroutines IREE will have a way to fill traditional pipeline bubbles even with execution from the same module (let alone across modules) in the situation where host readbacks or other logic is required. This increases overall throughput and utilization while reducing host wakeups as many coroutines can be processed at once to submit new work to the device queues, though it does not help reduce per-invocation latency.</p> <p>External code such as the HAL implementation or user ops may provide the wait handles used for continuation. For example, the HAL can expose a function that yields and wakes only when one or more timeline semaphores reach their target values:</p> <pre><code>// submit work\nhal.device.yield %semaphore4 &gt;= %sem4_target, %semaphore5 &gt;= %sem5_target\n// continue here, possibly much later in time\n</code></pre>"},{"location":"developers/design-docs/design-roadmap/#cellular-batching","title":"Cellular Batching","text":"<p>Though coroutines help throughput there is a way we've found to reduce latency that's been documented as cellular batching. This same technique has been implemented in prior internal systems and is one of the motivating design goals for IREE's creation. The core idea is to identify small uniform work that can be partitioned and scheduled greedily such as to enable batching or reduce associated invocation costs (such as refreshing accelerator SRAM/caches with new parameters). This usually manifests as finding large GEMM/GEMV operations using the same fixed parameters and either dynamically increasing the batch size by adding the waiting work (without deferring the actual execution time) or sequencing them back to back to ensure better cache utilization. Which approach is taken depends on any data dependencies that may be present (such as LSTM state feedback edges).</p> <p>With the foundation of coroutines in IREE it's possible to yield execution at any given point - including during command buffer recording - and wake on specific conditions. A majority of the logic can be built into the module itself with very little need for runtime machinery, as shared VM variables can be used to track pending work across invocations (even from different parts of the program) and flush based on logic wholly controlled by the user or compiler (such as count/max time latency/etc limits). This allows for the large variety of scheduling behavior various applications may want to use, such as a zero-latency batch-only-within-this-invocation to a Nagle's Algorithm-esque time or limit based behavior or even some learned model-specific windowing.</p> <p>Design work is still required on how to represent this in IR but the current thought is to model the regions in which deferred execution is possible and beneficial and allow during lowering to the VM additional transformations. This is similar to how the async-await behavior works in C# where the async keyword is just sugar that expands to additional generated helper utilities.</p> <p>A simple strawman representation for sequential dispatch may look like:</p> <pre><code>hal.scheduling_policy @defer_policy {\n  // max time, max count, max live memory, etc\n}\n...\nhal.command_buffer.dispatch.deferred @defer_policy, @dispatch, ...\n// vm.yield added here during lowering\n</code></pre> <p>There are many cases to explore and as cellular batching can have performance benefits of several orders of magnitudes it'll be one of the primary areas of research in the long-term.</p>"},{"location":"developers/design-docs/design-roadmap/#lowering-to-llvm-ir","title":"Lowering to LLVM IR","text":"<p>For scenarios where dynamic module loading is not required and entire modules can be compiled into applications we can lower the VM IR to LLVM IR within MLIR's transformation pipeline. Instead of embedding <code>vm.call</code> ops that are dispatched at runtime to things like the HAL we can instead lower to <code>llvm::CallInst</code> to runtime-resolved function pointers. This still enables all of the flexibility of heterogeneous/runtime-determined devices, pluggable diagnostics, and backend composition without any need for FlatBuffers or the VM bytecode interpreter.</p> <p>The VM was designed to make such a lowering easy and the C-style struct-based function pointer registration for runtime modules was designed to make emitting code that used it fairly robust even when linked in dynamically such as when embedded in shared objects.</p> <p>An extension of this is what we've been calling 'runtimeless mode', where the IREE VM linkage code is statically linked into the binary alongside the generated module LLVM IR. If only a single HAL backend is linked in then (with some build-fu) we should be able to get call devirtualization to reduce code size to precisely the functionality used by the module.</p>"},{"location":"developers/design-docs/design-roadmap/#improved-type-support","title":"Improved Type Support","text":"<p>Currently the VM only supports two types: <code>i32</code> and <code>vm.ref&lt;T&gt;</code>. This is an intentional limitation such that we can determine what is really needed to express the scheduling we perform, with the idea being that such a limited model will make it easier to use techniques like indirect command buffers to compile the VM itself to an accelerator executable that dispatches work without host involvement.</p> <p>As we port more models we may find a few primitives that are worth bringing into the VM design such that it's worth potential complications to future porting. These includes types like <code>f32</code> (for simple float calculations/comparisons), <code>list</code>/<code>dict</code> (easier python compatibility), and <code>vector&lt;4xf32&gt;</code> (for simple inline calculations that are not worth dispatch overhead/synchronization).</p>"},{"location":"developers/design-docs/design-roadmap/#indirect-command-bufferon-accelerator-execution","title":"Indirect Command Buffer/On-Accelerator Execution","text":"<p>Though IREE will use many different tricks such as predication to build deep pipelines there is still the requirement that the command recording and submission happens on the host CPU. Though the cost of this in terms of latency and power use can be minimized by coalescing and timelines there is still the possibility of non-trivial roundtrips being introduced that limit performance. For particular applications like low-power always-on compute or where there is significantly branchy behavior (such as search algorithms) it is important that the decision making logic as to what is dispatched runs as close to real-time as possible within the execution pipeline.</p> <p>The IREE VM is designed to be runnable on-device in a secure and cooperative way (no pointers, indirect buffer handles to allow for memory space rearrangement op-to-op, deterministic execution and explicit yield points, etc).</p> <p>The recent efforts to bring indirect command buffers to Vulkan and Metal's Indirect Command Buffers (that both derive inspiration from NV_command_list) are one such target for this. Either by lowering the VM IR to LLVM IR or SPIR-V, by a special conversion to target-specific forms, or by actually executing the VM bytecode directly on-device (it's ~1000 LoC) we should be able to prototype what full on-device usage is like. Even if only some VM functions the compiler deems useful to schedule on the device are used and the rest run on the host (particularly those functions calling imported functions) some of the most costly logic that creates tight coupling of the host and device scheduling can be limited.</p>"},{"location":"developers/design-docs/function-abi/","title":"Function ABI","text":"<p>Note</p> <p>Authored December, 2019</p> <p>Updated August, 2021</p> <p>A key job of the IREE compiler and runtime is capturing function call semantics from the originating system and providing mechanisms so that invocations can be performed in as similar way as possible in various target languages. In general, this requires additional metadata on top of the raw characteristics of a function. Where possible, this is done by attaching attributes to a function.</p> <ul> <li><code>iree.abi</code> : JSON encoded description of the function's calling convention.</li> </ul>"},{"location":"developers/design-docs/function-abi/#v1-abi","title":"V1 ABI","text":"<p>This is the default ABI supported by the IREE VM invocations. It attempts to provide a default calling convention that can be used without further reflection metadata but which may be enhanced with it.</p> <p>It natively allows monomorphic functions to be exported where arguments and results are composed of the following types:</p>"},{"location":"developers/design-docs/function-abi/#value-types","title":"Value Types:","text":"<ul> <li>Byte aligned integer type (i8, i16, i32, i64)</li> <li>Floating point value (f16, f32, f64)</li> </ul>"},{"location":"developers/design-docs/function-abi/#reference-types","title":"Reference Types:","text":"<ul> <li> <p>ND-Array buffers of Value Types:</p> <ul> <li>Simple: Packed, C-layout</li> <li>Strided: Arbitrary layout with strides (future)</li> </ul> </li> <li> <p>String (byte arrays)</p> </li> <li> <p>Opaque reference object</p> </li> </ul>"},{"location":"developers/design-docs/function-abi/#sequence-types","title":"Sequence Types:","text":"<ul> <li>Tuples: fixed length lists where each position has its own type bound</li> <li>Homogenous list: lists of arbitrary size where a single type bound applies     to all elements</li> </ul> <p>The intent with these low level types is that calling conventions can be synthesized to bind arbitrary high level, domain/language specific signatures to these types, possibly by way of additional reflection metadata.</p>"},{"location":"developers/design-docs/function-abi/#representations","title":"Representations:","text":"<p>The above are all representable with native constructs in the VM:</p> <ul> <li> <p>ValueType:</p> <ul> <li>Runtime:     <code>iree_vm_value</code></li> <li>Compile Time: primitive MLIR integer/floating point types</li> </ul> </li> <li> <p>Simple ND-Array Buffer:</p> <ul> <li>Runtime:     <code>iree_hal_buffer_view</code></li> <li>Compile Time: <code>tensor&lt;&gt;</code></li> </ul> </li> <li> <p>String:</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     containing <code>i8</code></li> <li>Compile Time: <code>!util.list&lt;i8&gt;</code></li> </ul> </li> <li> <p>Tuple:</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     of variant</li> <li>Compile Time: <code>!util.list&lt;?&gt;</code></li> <li>Note that these are statically type erased at the boundary.</li> </ul> </li> <li> <p>TypedList (homogenous):</p> <ul> <li>Runtime:     <code>iree_vm_list</code>     of <code>T</code></li> <li>Compile Time: <code>!util.list&lt;T&gt;</code></li> </ul> </li> </ul>"},{"location":"developers/design-docs/function-abi/#extended-type-calling-conventions","title":"Extended Type Calling Conventions","text":"<p>While the above features of the native ABI may be sufficient for direct use by various programs, many programs and callers will need to represent various higher level types, consistently mapping them to the above facilities. This section describes calling conventions for various higher level types which do not map 1:1 to the above. Not all source language types are representable, and extending these calling conventions (and the fundamental types above) is demand driven.</p> <p>All of these calling conventions presume that the arity of the arguments/results of the raw function matches the user-level function, meaning that the calling convention is specified per argument/result. Higher-level whole function transformations may also exist for some domains but are outside of the scope of this specification.</p>"},{"location":"developers/design-docs/function-abi/#structure","title":"Structure","text":"<p>A <code>Structure</code> is a common enough entity to have a dedicated calling convention. In C-like languages, this may just be a <code>struct</code>. In Python, it is typically a <code>dict</code> with an associated schema providing a name and type bound for each of its slots. In both, its slots are of fixed arity.</p> <p>In this convention, such a structure is represented as a <code>Tuple</code> in the native calling convention (i.e. <code>!util.list</code> of variant type). The order of the elements of the tuple are the natural order of the structure, where that is either:</p> <ul> <li>For a C-like system where order is determinate, it is the order of     declaration.</li> <li>For a name-based system (i.e. bind to <code>dict</code>) where no order is defined, the     natural order will be the lexically sorted order of the keys.</li> </ul>"},{"location":"developers/design-docs/function-abi/#string","title":"String","text":"<p>Most languages interop between byte arrays (i.e. the native ABI <code>String</code> type) by way of applying an encoding. Such strings are just a sequence of bytes (i.e. <code>!util.list&lt;i8&gt;</code>).</p>"},{"location":"developers/design-docs/function-abi/#typed-list","title":"Typed List","text":"<p>High level lists which all share the same type bound are represented as a <code>TypedList</code> in the native ABI.</p>"},{"location":"developers/design-docs/function-abi/#ndarray-of-reference-types","title":"NDArray of Reference Types","text":"<p>NDArrays of reference types are considered separately from those of value types. Internally, the code generated for them is completely different from what gets generated for numeric based arrays (i.e. has ref-counting, ownership semantics, non-POD, etc). These types are permitted for completeness, not necessarily performance: by nature they are already indirected and have overheads.</p> <p>In the native ABI, these are represented as a composite tuple type (i.e. today a list since sugar for tuple is not yet defined): <code>!iree.tuple&lt;!util.list&lt;T&gt;, !util.list&lt;index&gt;&gt;</code>. The first element of the tuple is the list of values, packed with a C-Layout and the second element is the list of dimension sizes.</p>"},{"location":"developers/design-docs/function-abi/#reflection","title":"Reflection","text":"<p>Additional reflection metadata may be encoded in a custom JSON form, providing additional typing hints for arguments and results. If present, this will be a reflection attribute with key <code>d</code>, containing a serialized JSON object.</p> <p>The JSON object contains:</p> <ul> <li><code>a</code> (array): List of type records for each argument.</li> <li><code>r</code> (array): List of type records for each argument.</li> </ul> <p>Type records are one of:</p> <ul> <li> <p>A string naming a primitive type:</p> <ul> <li><code>i[0-9]+</code>: Integer type with given bit width</li> <li><code>f[0-9]+</code>: IEEE floating point type with given bit width</li> <li><code>bf16</code>: BFloat16</li> </ul> </li> <li> <p>JSON <code>null</code>: A null reference value</p> </li> <li> <p><code>\"unknown\"</code>: An unknown/unmapped type</p> </li> <li> <p>An array, interpreted as a tuple describing a compound type.</p> </li> </ul>"},{"location":"developers/design-docs/function-abi/#compound-type-tuples","title":"Compound type tuples","text":"<p>A compound type tuple has a type identifier as its first element, followed with type specific fields:</p> <ul> <li><code>[\"named\", \"key\", {slot_type}]</code>: Associates a name with a slot. This is     used with the root argument list to denote named arguments that can be     passed positionally or by keyword.</li> <li><code>[\"ndarray\", {element_type}, {rank}, {dim...}]</code>: For unknown rank, the     <code>rank</code> will be <code>null</code> and there will be no dims. Any unknown dim will be     <code>null</code>.</li> <li><code>[\"slist\", {slot_type...}]</code>: An anonymous structured list of fixed arity and     slot specific types. If there are gaps in the list, empty slots will have a     <code>null</code> type.</li> <li><code>[\"stuple\", {slot_type...}]</code>: Same as <code>slist</code> but some languages     differentiate between sequences represented as lists and those represented     as tuples (read-only lists).</li> <li><code>[\"sdict\", [\"key\", {slot_type}]...]</code>: An anonymous structure with named     slots. Note that when passing these types, the keys are not passed to the     function (only the slot values).</li> <li><code>[\"py_homogeneous_list\", {element_type}]</code>: A Python list of unknown size     with elements sharing a common type bound given by <code>element_type</code>.</li> </ul>"},{"location":"developers/design-docs/hip-hal-driver/","title":"HIP HAL driver","text":"<p>This document lists technical details regarding the HIP implemenation of IREE's Hardware Abstraction Layer, called a HIP HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using HIP is mostly straightforward; though there are places we need emulation given no direct mapping concepts or mechanisms. HIP HAL driver draws inspiration from the CUDA HAL driver and the code structure is based off of that implementation.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#overall-design-choices","title":"Overall design choices","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#hip-driver-vs-runtime-api","title":"HIP driver vs runtime API","text":"<p>IREE HAL's design draws inspiration from modern GPU APIs--it provides explicit control of low-level GPU objects. The compiler is expected to plan the object lifetime and schedule workload and synchronization in an optimized way; IREE HAL implementation and the underlying GPU driver stack is expected to be a thin layer without much smarts and magic.</p> <p>Unlike CUDA, HIP doesn't provide two separate API's with the same functionality in the name of driver and runtime. Instead it extends the HIP API with Modules and Ctx control API's that the CUDA driver API's exclusively offer.At runtime the HIP HAL driver will load the <code>libamdhip64.so</code>/<code>amdhip64.dll</code> library dynamically.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#gpu-objects","title":"GPU Objects","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#driver","title":"Driver","text":"<p>There is no direct HIP construct that map to the IREE HAL <code>iree_hal_driver_t</code> abstraction. We use it to hold the dynamic symbols loaded for all devices, and device enumeration and creation.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_hip_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to HIP GPU device by wrapping a <code>hipDevice_t</code>. For each device, right now we create two <code>hipStream_t</code>s--one for issuing commands for memory allocation and kernel lauches as instructed by the program; the other for issue host callback functions after dispatched command buffers completes. See synchronization section regarding the details.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#async-allocation","title":"Async allocation","text":"<p>The HIP HAL drivers supports async allocation (<code>iree_hal_device_queue_alloca()</code> and <code>iree_hal_device_queue_dealloca()</code>) via HIP stream ordered memory allocation.</p> <p>The <code>async_allocations</code> in the <code>iree_hal_hip_device_params_t</code> struct allows to enable this feature.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#command-buffer","title":"Command buffer","text":"<p><code>iree_hal_command_buffer_t</code> is a recording of commands to issue to the GPU; when the command buffer is submitted to the device it's then actually executed on the GPU asynchronously.</p> <p>Two implementations of <code>iree_hal_command_buffer_t</code> exist in the HIP HAL driver--one backed by <code>hipGraph_t</code> and the other backed by <code>hipStream_t</code>.</p> <p><code>hipGraph_t</code> conceptually matches <code>iree_hal_command_buffer_t</code> better given it's a recording of commands to issue to the GPU. Also using the <code>hipGraph_t</code> API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams. Therefore, the <code>hipGraph_t</code>-backed implementation is a more natural one. Though note that <code>hipGraph_t</code> API is meant to be used for recording once and replaying multiple times and there may be a performance penalty to using <code>hipGraph_t</code> API for one-shot command buffer.</p> <p>The <code>hipStream_t</code>-backed implementation just issues commands directly to a <code>hipStream_t</code> when recording. Commands issued to <code>hipStream_t</code> can be immediately sent to the GPU for execution; there is no recording and replaying separation. In order to match the recording semantics of <code>iree_hal_command_buffer_t</code>, to use the <code>hipStream_t</code>-backed command buffer, we need to first record the command buffer into an in-memory <code>iree_hal_deferred_command_buffer_t</code>, and then when applying the command buffer, we create a new <code>hipStream_t</code>-backed implementation.</p> <p>The <code>command_buffer_mode</code> in the <code>iree_hal_hips_device_params_t</code> struct allows to select which implementation to use.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#allocator","title":"Allocator","text":"<p>The allocator will forward allocation requests to <code>hipHostMalloc()</code> for host local memory, <code>hipMalloc()</code> for device local and host invisible memory, and <code>hipMallocManaged()</code> for device local and host visible memory.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#buffer","title":"Buffer","text":"<p>HIP buffers are represented either as a host pointer or a device pointer of type <code>hipDeviceptr_t</code>.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#executable","title":"Executable","text":"<p><code>iree_hal_executable_t</code> maps naturally to <code>hipModule_t</code>.</p> <p>The compiler generates a FlatBuffer containing a HSACO image as well as a list of entry point functions and their associated metadata (names, workgroup size, dynamic shared memory size, etc.). At runtime, the HIP HAL driver loads the HSACO image and creates <code>hipFunction_t</code>s out of it for various entry points.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#synchronization","title":"Synchronization","text":"","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#event","title":"Event","text":"<p><code>iree_hal_event_t</code> right now is not used in the compiler so it's not yet implemented in the HIP HAL driver.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#semaphore","title":"Semaphore","text":"<p>The IREE HAL uses semaphores to synchronize work between host CPU threads and device GPU streams. It's a unified primitive that covers all directions--host to host, host to device, device to host, and device to device, and allows flexible signal and wait ordering--signal before wait, or wait before signal. There is no limit on the number of waits of the same value too.</p> <p>The core state of a HAL semaphore consists of a monotonically increasing 64-bit integer value, which forms a timeline--signaling the semaphore to a larger value advances the timeline and unblocks work waiting on some earlier values. The semantics closely mirrors Vulkan timeline semaphore.</p> <p>In HIP, there is no direct equivalent primitives providing all the capabilities needed by the HAL semaphore abstraction. Therefore, to implement the support, we need to leverage multiple native CPU or HIP primitives under the hood.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#hipevent_t-capabilities","title":"<code>hipEvent_t</code> capabilities","text":"<p>The main synchronization mechanism is HIP event--<code>hipEvent_t</code>. As a functionality and integration baseline, we use <code>hipEvent_t</code> to implement the IREE HAL semaphore abstraction.</p> <p><code>hipEvent_t</code> natively supports the following capabilities:</p> <ul> <li>State: binary; either unsignaled or signaled. There can exist multiple   waits (e.g., via <code>hipEventSynchronize()</code> or <code>hipGraphAddEventWaitNode()</code>) for   the same <code>hipEvent_t</code> signal (e.g., via <code>hipEventRecord()</code> or   <code>hipGraphAddEventRecordNode()</code>).</li> <li>Ordering: must be signal before wait. Waiting before signal would mean   waiting an empty set of work, or previously recorded work.</li> <li>Direction: device to device, device to host.</li> </ul> <p>We need to fill the remaining capability gaps. Before going into details, the overall approach would be to:</p> <ul> <li>State: we need a 64-bit integer value timeline. Given the binary state of   a <code>hipEvent_t</code>, each <code>hipEvent_t</code> would just be a \"timepoint\" on the timeline.</li> <li>Ordering: we need to defer releasing the workload to the GPU until the   semaphore waits are reached on the host, or we can have some device   <code>hipEvent_t</code> to wait on.</li> <li>Direction: host to host and host to device is missing; we can support that   with host synchronization mechanisms.</li> </ul>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#signal-to-wait-analysis","title":"Signal to wait analysis","text":"<p>Concretely, for a given HAL semaphore, looking at the four directions:</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#cpu-signal","title":"CPU signal","text":"<p>A CPU thread signals the semaphore timeline to a new value.</p> <p>If there are CPU waits, it is purely on the CPU side. We just need to use common CPU notification mechanisms. In IREE we have <code>iree_event_t</code> wrapping various low-level OS primitives for it. So we can just use that to represent a wait timepoint. We need to keep track of all CPU wait timepoints in the timeline. After a new signaled value, go through the timeline and notify all those waiting on earlier values.</p> <p>If there are GPU waits, given that there are no way we can signal a <code>hipEvent_t</code> on CPU, one way to handle this is to cache and defer the submission batches by ourselves until CPU signals past the desired value. To support this, we would need to implement a deferred/pending actions queue.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#gpu-signal","title":"GPU signal","text":"<p>GPU signals can only be through a <code>hipEvent_t</code> object, which has a binary state. We need to advance the timeline too. One way is to use <code>hipLaunchHostFunc()</code> to advance from the CPU side with <code>iree_hal_semaphore_list_signal()</code>. This additionally would mean we can reuse the logic form CPU signaling to unblock CPU waits.</p> <p>After advancing the timeline from the CPU side with <code>hipLaunchHostFunc()</code>, we can release more workload from the deferred/pending actions queue to the GPU. Though, per the documentation of <code>hipLaunchHostFunc()</code>, \"the host function must not make any HIP API calls.\" So we cannot do that directly inside <code>hipLaunchHostFunc()</code>; we need to notify another separate thread to call HIP APIs to push more work to the GPU. So the deferred/pending action queue should have an associcated thread.</p> <p>For GPU waits, we can also leverage the same logic--using CPU signaling to unblock deferred GPU queue actions. Though this is performant, given that the CPU is involved for GPU internal synchronization. We want to use <code>hipEvent_t</code> instead:</p> <ul> <li>We keep track of all GPU signals in the timeline. Once we see a GPU wait   request, try to scan the timeline to find a GPU signal that advances the   timeline past the desired value, and use that for waiting instead. (This   actually applies to CPU waits too, and it's an optimization over pure   CPU side <code>iree_event_t</code> polling.)</li> <li>We may not see GPU signal before seeing GPU wait requests, then we can also   keep track of all GPU waits in the timeline. Later once see either a CPU   signal or GPU signal advancing past the waited value, we can handle them   accordingly--submitting immediately or associating the <code>hipEvent_t</code>.   This would also guarantee the requirement of <code>hipEvent_t</code>--recording should   happen before waiting.</li> <li>We can use the same <code>hipEvent_t</code> to unblock multiple GPU waits. That's allowed,   though it would mean we need to be careful regarding <code>hipEvent_t</code> lifetime   management. Here we can use reference counting to see how many timepoints   are using it and automatically return to a pool once done.</li> </ul> <p>Another problem is that per the <code>hipLaunchHostFunc()</code> doc, \"the function will be called after currently enqueued work and will block work added after it.\" We don't want the blocking behavior involving host. So we can use a dedicated <code>hipStream_t</code> for launching the host function, waiting on the <code>hipEvent_t</code> from the original stream too. We can also handle resource deallocation together there.</p>","tags":["GPU","HIP"]},{"location":"developers/design-docs/hip-hal-driver/#data-structures","title":"Data structures","text":"<p>To summarize, we need the following data structures to implement HAL semaphore:</p> <ul> <li><code>iree_event_t</code>: CPU notification mechanism wrapping low-level OS primitives.   Used by host wait timepoints.</li> <li><code>iree_event_pool_t</code>: a pool for CPU <code>iree_event_t</code> objects to recycle.</li> <li><code>iree_hal_hip_event_t</code>: GPU notification mechanism wrapping a <code>hipEvent_t</code> and   a reference count. Used by device signal and wait timepoints. Associates with   a <code>iree_hal_hip_event_pool_t</code> pool--returns to the pool directly on once   reference count goes to 0.</li> <li><code>iree_hal_hip_event_pool_t</code>: a pool for GPU <code>iree_hal_hip_event_t</code> objects   to recycle.</li> <li><code>iree_hal_hip_timepoint_t</code>: an object that wraps a CPU <code>iree_event_t</code> or   GPU <code>iree_hal_hip_event_t</code> to represent wait/signal of a timepoint on a   timeline.</li> <li><code>iree_hal_hip_timepoint_pool_t</code>: a pool for <code>iree_hal_hip_timepoint_t</code>   objects to recycle. This pool builds upon the CPU and GPU event pool--it   acquires CPU/GPU event objects there.</li> <li><code>iree_hal_hip_timeline_semaphore_t</code>: contains a list of CPU wait and GPU   wait/signal timepoints.</li> <li><code>iree_hal_hip_queue_action_t</code>: a pending queue action (kernel launch or   stream-ordered allocation).</li> <li><code>iree_hal_hip_pending_queue_actions_t</code>: a data structure to manage pending   queue actions. It provides APIs to enqueue actions, and advance the queue on   demand--queue actions are released to the GPU when all their wait semaphores   are signaled past the desired value, or we can have a <code>hipEvent_t</code> object already   recorded to some <code>hipStream_t</code> to wait on.</li> </ul>","tags":["GPU","HIP"]},{"location":"developers/design-docs/invocation-execution-model/","title":"Invocation execution model","text":"<p>Authored June, 2022</p> <p>This documents the behavior of the user-visible invocation mechanism IREE uses to schedule program execution. Internally IREE uses a very similar modeling for tracking its internal workloads and in kind carries that down to target APIs and devices that themselves use a very similar model. The intent is to expose the device model in an abstracted way that allows for the full capture and communication of the execution intent to be propagated to the hardware that executes it. Though here we focus on the user-visible portion of execution there is really only one \"IREE execution model\" and the entire stack follows the same design. At its core this design is just an instantiation of an out-of-order execution algorithm such as those originating from the 1960's.</p>"},{"location":"developers/design-docs/invocation-execution-model/#glossary","title":"Glossary","text":"<pre><code>stateDiagram\n    state UserApplication {\n      direction BT\n      state Context0 {\n        ModuleA--&gt;ModuleAState0\n        ModuleB--&gt;ModuleBState0\n      }\n      state Context1 {\n        ModuleA--&gt;ModuleAState1\n        ModuleB--&gt;ModuleBState1\n        ModuleC--&gt;ModuleCState1\n      }\n      state ModuleA {\n        @func1\n        @func2\n      }\n      state ModuleB {\n        @func3\n        @func4\n      }\n      state ModuleC {\n        @func5\n      }\n    }</code></pre>"},{"location":"developers/design-docs/invocation-execution-model/#program","title":"Program","text":"<p>An IREE program is a collection of modules instantiated in a context from which invocations can be made. Invocations are ordered on a user-controlled timeline that uses fences to define the execution order requirements to enable out-of-order execution. A hosting user application may have multiple programs or multiple instances of the same program available and running invocations at a time across multiple timelines.</p>"},{"location":"developers/design-docs/invocation-execution-model/#module","title":"Module","text":"<p>Modules define executable code and data that can be loaded, linked, and run \u00e0 la ELF shared libraries. Modules may be implemented as C/C++, generated bytecode or C sources from the IREE compiler, or any other mechanism that can run code and implement the <code>iree_vm_module_t</code> interface. Modules on their own are read-only and can be reused across many contexts.</p> <p>Traditional ML runtimes would use a model (graph, etc) as their module representation. In IREE everything is a module including runtime subsystems like the HAL and user-provided custom code. This ensures that anything IREE can do can be externalized and replaced by users without needing to modify the core IREE code.</p>"},{"location":"developers/design-docs/invocation-execution-model/#context","title":"Context","text":"<p>A collection of modules are linked and instantiated in a context. Each context operates independently and carries its own copies of mutable module state. Invocations execute within a context scope and hosting applications coordinate across contexts as required. Contexts are cheap to create (microseconds) and retain (~100B + program state) such that users can decide how to manage them based on their scenario.</p> <p>Traditional ML runtimes would call these \"sessions\" but in IREE everything is a program. Whether the program is stateful or stateless and how the program is invoked is up to the program author.</p>"},{"location":"developers/design-docs/invocation-execution-model/#invocation","title":"Invocation","text":"<p>An invocation represents a single call into a module exported function using the program state stored in a context. Users can decide whether to perform synchronous blocking invocations or asynchronous non-blocking invocations per-call; the behavior of the invocation is independent from the target function and a user program may contain a mix of both.</p> <p>As an example a user program may synchronously invoke a <code>@query_output_shapes</code> function to preallocate storage for an asynchronous <code>@execute_in_place</code> function to write into.</p>"},{"location":"developers/design-docs/invocation-execution-model/#timeline","title":"Timeline","text":"<p>A timeline represents the observable order of execution. Users define their own timelines and communicate them to IREE via fences. Timelines do not match up with the order of invocations unless the user dictates they must by way of fences. In the absence of fences all invocations execute in an arbitrary order and they may execute concurrently just as threads in C with no barriers.</p> <p>Each timeline can be thought of as an independent clock domain that may operate asynchronously at its own frequency with only fences acting to tie separate timelines together. This directly mirrors real hardware constraints like clock domain crossing as each execution scope (thread on core, driver calls to queues, kernel queues to device queues, device queues to compute unit queues, etc) is naturally operating at different rates and well-designed systems must tolerate that variability.</p>"},{"location":"developers/design-docs/invocation-execution-model/#fence","title":"Fence","text":"<p>A fence is a specific point of progress in one or more timelines acting as a barrier, fork, or join point. Fences only guard execution ordering and not any particular resources though users can use them to guard resources by defining when in time the resources are available for use.</p> <p>Waits on fences are wait-until operations specifying that the timeline must reach  at least a specific point. This allows for flexible reordering and deferral of execution as executors can pull forward scheduled work based on policy (run similar work together, etc).</p>"},{"location":"developers/design-docs/invocation-execution-model/#hardware-abstraction-layer-hal","title":"Hardware Abstraction Layer (HAL)","text":"<p>The HAL is an optional feature of IREE that is used to provide a consistent interface across execution resources. It is used internally by IREE programs to define and submit work to devices and signal across them but may also be used by users to directly interface with hardware in a compatible way. Exposing the HAL API allows for users to efficiently manage their data and custom execution without expensive marshaling. Most users will only interact with HAL buffers as they work with their data but more advanced integrations can directly insert IREE into existing device contexts to transparently share scheduling and resources or insert their own code into IREE to pipeline custom execution.</p>"},{"location":"developers/design-docs/invocation-execution-model/#execution-by-timelines","title":"Execution by Timelines","text":"<p>NOTE: this defines an execution scheme that IREE supports but a user may use one or more such schemes in a single program - just as a C application may mix single- and multi-threaded code within itself for different components.</p> <p>The combination of invocations, timelines, and fences allows users to provide future knowledge to lower layers of the system by declaring their availability requirements and the lower layers are then able to execute the work out-of-order so long as the specified requirements are met. The primary goal when designing for such a system is to specify as few requirements as possible in order to provide the maximum amount of scheduling freedom to the implementation.</p> <p>This makes timelines one of the most critical components of the interface. The purpose of invocations is to schedule work against one or more timelines and what happens within the invocations is an implementation detail of the program.</p>"},{"location":"developers/design-docs/invocation-execution-model/#sequential-execution","title":"Sequential Execution","text":"<p>Here we say \"a user invokes a function to schedule execution on a timeline\" vs. a more traditional \"a user invokes a function to execute work\" and this manifests in the IREE ABI as invocations taking fences defining specific points on timelines of which the user may observe:</p> <pre><code># Fences are effectively just timeline + integer tuples and are cheap to hold.\nwait_fence = my_timeline.at(t)\nsignal_fence = my_timeline.at(t+1)\n# Schedule work against the timeline.\n# All work prior to t must complete before execution can occur and after\n# execution the timeline will advance to t+1.\nasync_invoke(@some_fn, wait_fence, signal_fence)\n# The invocation may have returned immediately after the work was scheduled;\n# until the fence is reached no actual execution may have occurred. To\n# synchronize the user code with the timeline the user can block until the fence\n# is reached.\nsignal_fence.wait()\n</code></pre> <p>To the user this would appear as:</p> <pre><code>sequenceDiagram\n    User-&gt;&gt;@some_func: invoke\n    activate @some_func\n    @some_func-&gt;&gt;User: ;\n    @some_func--&gt;&gt;@some_func: wait t\n    @some_func--&gt;&gt;User: signal t+1\n    deactivate @some_func</code></pre> <p>This means from the user's perspective the actual operations performed by the invocation are not important: the only thing the user can observe in this situation is when the timeline reaches <code>t+1</code> as they specified. Whether internally the invocation needs many steps to complete as there are timelines internal to the program is an implementation detail. Actual execution may look like this:</p> <pre><code>sequenceDiagram\n    User-&gt;&gt;@some_func: invoke\n    activate @some_func\n    @some_func-&gt;&gt;User:  ;\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device A: ;\n    Device A--&gt;&gt;Device A: wait t\n    activate Device A\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device B: ;\n    activate Device B\n    @some_func-&gt;&gt;@some_func: ;\n    Device A--&gt;&gt;@some_func: ;\n    deactivate Device A\n    @some_func-&gt;&gt;@some_func: ;\n    @some_func--&gt;&gt;Device B: ;\n    activate Device B\n    deactivate @some_func\n    Device B--&gt;&gt;User: signal t+1\n    deactivate Device B\n    deactivate Device B</code></pre> <p>Even in this simple user-synchronous example the system is able to internally run several concurrent timelines with a minimal number of synchronization points and the lowest possible latency as the user is immediately notified without any intermediate layers needing to be woken, scheduled, executed, and passed on.</p>"},{"location":"developers/design-docs/invocation-execution-model/#pipelined-execution","title":"Pipelined Execution","text":"<p>The true power of timelines comes from the ability to pipeline execution. Users define DAGs with fences and can construct arbitrarily complex execution topologies whether from the same program or across multiple programs:</p> <pre><code>stateDiagram\n    direction LR\n    state fence0 &lt;&lt;fork&gt;&gt;\n    [*] --&gt; fence0\n    fence0 --&gt; @fn0\n    state fence1 &lt;&lt;fork&gt;&gt;\n    @fn0 --&gt; fence1\n    fence1 --&gt; @fn1\n    fence1 --&gt; @fn2\n    state fence2 &lt;&lt;join&gt;&gt;\n    @fn1 --&gt; fence2\n    @fn2 --&gt; fence2\n    @fn3 --&gt; fence2\n    fence0 --&gt; @fn4\n    @fn4 --&gt; fence2\n    fence2 --&gt; [*]</code></pre> <p>This is a simple extension to the synchronous example using the same primitives:</p> <pre><code># Timeline is defined by the user.\nfence_a = my_timeline.at(t)\nfence_b = my_timeline.at(t+1)\nfence_c = my_timeline.at(t+2)\n# Invocations are launched using the fences and may not complete immediately.\nasync_invoke(@fn0, fence_a, fence_b)\nasync_invoke(@fn1, fence_b, fence_c)\nasync_invoke(@fn2, fence_b, fence_c)\nasync_invoke(@fn3, None, fence_c)\nasync_invoke(@fn4, fence_a, fence_c)\n# Blocking here but no need to; could pass fence_c on to other invocations.\nfence_c.wait()\n</code></pre> <p>The critical point of this being that the user never had to wait for any particular invocation to complete before being able to schedule more work against the timeline, even if those invocations could themselves not complete synchronously. The lower layers of the system are able to fully model the execution as early as possible without needing to communicate (and importantly synchronize) with the user.</p>"},{"location":"developers/design-docs/invocation-execution-model/#io","title":"I/O","text":"<p>Users define the semantics of their programs themselves. For example if the user knows the precise shape of an output buffer they can preallocate the buffer and pass it in. If they don't know they can decide to factor out the shape calculation and invoke that synchronously in order to compute the shape, allocate the appropriately sized buffer, and pass that in. Or they could decide to only deal with synchronous invocations and return a program-allocated buffer view with the appropriate shape in their callback. IREE does not dictate the design of user programs and as such enables mixed stateful/stateless, asynchronous/synchronous, and arbitrary scheduling models (enqueue/drain, windowing, etc).</p> <p>Inputs and outputs to invocations are provided by the user as primitive values (integers, floats, etc), supported builtin types (lists, byte buffers/strings), custom user types, and HAL types like buffers or buffer views (buffers + shape and type metadata). One or more wait fences can be used to order invocation access to one or more inputs by indicating that the resource is not available until a certain fence is reached. Similarly one or more signal fences can be used to order subsequent access to the resources by indicating the advancement of the timeline when they are available.</p> <pre><code># wait_fence_a must be reached before buffer_a and buffer_b can be read.\n# wait_fence_b must be reached before buffer_c can be read.\n# buffer_a will be ready to read when signal_fence_a has been reached.\nasync_invoke(@fn,\n             (wait_fence_a, buffer_a, buffer_b),\n             42,  # no ordering required on value types\n             (wait_fence_b, buffer_c),\n             (signal_fence_a, buffer_a))\n</code></pre> <p>The above example demonstrates an in-place operation on <code>buffer_a</code>. It's also possible for invocations to return values:</p> <pre><code>result = invoke(@sum, 1, 2)  # = 3\n</code></pre> <p>When executed asynchronously a callback or any construct that can be built upon them (like promises/futures) can receive the results:</p> <pre><code>def my_callback(result):\n  print(result)  # 3\nasync_invoke(@sum, 1, 2, my_callback)\n</code></pre>"},{"location":"developers/design-docs/invocation-execution-model/#stream-ordered-allocations","title":"Stream-ordered Allocations","text":"<p>Invocations generally have only a few KB of overhead and pipelined command buffers take only a small amount more. Storage buffers, however, can easily take hundreds of MB per invocation for I/O and transient state. This compounds as program usage becomes more complex or multiple programs are involved. IREE supports traditional host-ordered allocations (\u00e0 la malloc/free) for persistent buffers like large constants/read-only data or user-managed ringbuffers. Stream-ordered allocations are also supported to allow for pooled buffer reservations that can be allocated in a scheduled order alongside program execution.</p> <p>For more detailed examples see the CUDA blog posts describing their implementation: part 1, part 2.</p> <p>With stream-ordered allocations each allocation and deallocation operation is scheduled with wait and signal fences just as with invocations. This allows these allocation operations to execute remotely on device without host program involvement. For example, scheduling <code>alloca0</code>/<code>dealloca0</code> and <code>alloca1</code>/<code>dealloca1</code> interleaved with the function execution allows for the transient memory required for executing <code>@fn0</code> to remain uncommitted until immediately before it is executed, committed during execution, and then decommitted immediately after execution. The memory required for passing data from <code>@fn0</code> to the subsequent <code>@fn1</code> and <code>@fn2</code> survives until after they have completed executing before being decommitted. By using the same scheduling primitives as execution the allocation topology can be as arbitrarily complex as the invocation topology:</p> <pre><code>stateDiagram\n    direction LR\n    state fence0a &lt;&lt;fork&gt;&gt;\n    [*] --&gt; fence0a\n    state fence0b &lt;&lt;fork&gt;&gt;\n    fence0a --&gt; alloca0\n    fence0a --&gt; alloca1\n    alloca0 --&gt; fence0b\n    alloca1 --&gt; fence0b\n    fence0b --&gt; @fn0\n    state fence1a &lt;&lt;fork&gt;&gt;\n    @fn0 --&gt; fence1a\n    state fence1b &lt;&lt;fork&gt;&gt;\n    fence1a --&gt; dealloc0\n    dealloc0 --&gt; fence1b\n    fence1b --&gt; @fn1\n    fence1b --&gt; @fn2\n    state fence2a &lt;&lt;join&gt;&gt;\n    @fn1 --&gt; fence2a\n    @fn2 --&gt; fence2a\n    state fence2b\n    fence2a --&gt; dealloc1\n    state fence2b &lt;&lt;join&gt;&gt;\n    dealloc1 --&gt; fence2b\n    fence2b --&gt; [*]</code></pre> <p>When operating in this way allocations from the host-perspective are just reservations for a slice of pooled storage that will be committed at some point in the future. Likewise deallocations from the host-perspective release the prior reservation and schedule the paired decommit at some point in the future. Scheduling N sequential invocations thus requires only enough committed storage for a single invocation in addition to the I/O (unless that too is stream-ordered).</p> <p>This scheduling behavior allows for both minimal peak memory consumption regardless of the number of programs or invocation pipeline depth and sharing of committed storage across programs: the memory consumption of a program at rest is near zero when stateless and the sum of all state when stateful. Target devices that natively support stream-ordered allocations (like CUDA) can even share pools across processes.</p> <p>The other provided feature in combination with the fence guaranteed forward progress is that so long as the memory pool can service a single request execution can still continue even when constrained. A device can serialize two independent invocations requiring 400MB of transient memory when the system only has 512MB available with no user-visible impact besides increased latency. This does require the user to ensure they schedule work that is possible to run or rely on the target system having paging in order to lighten the strictness of the pool quotas.</p> <p>Stream-ordered allocations performed by the user for invocation inputs can be declared as transferred to the program. This allows the program to eagerly deallocate or reuse the input storage while still preserving the internal scheduling requirements of the program.</p>"},{"location":"developers/design-docs/invocation-execution-model/#internal-state","title":"Internal State","text":"<p>A stateful program may contain internal timelines that it uses to order its own execution. Take for example this simple stateful program:</p> <pre><code>class TrivialKernel(Program):\n  _x0 = Program.export_global(x_type)\n  def get(self):\n    return self._x0\n  def set(self, x=x_type):\n    self._x0 = x\n  def matmul(self, x=y_type):\n    self._x0 = self._matmul(x, self._x0)\n  @Program.kernel\n  def _matmul(x, x0):\n    return jnp.matmul(x, x0)\n</code></pre> <p>Each invocation of <code>matmul</code> needs to be executed in-order with prior invocations as there is a data dependency established on <code>self._x0</code>. Attempts to <code>get</code> or <code>set</code> must also be sequenced correctly with the <code>matmul</code> invocations. A basic usage like this:</p> <pre><code>m = TrivialKernel()\nm.set(input)\nm.matmul(a)\nm.matmul(b)\nm.matmul(c)\noutput = m.get()\nprint(output)  # implicit wait\n</code></pre> <p>Would be executed as:</p> <pre><code>sequenceDiagram\n    activate User\n    User-&gt;&gt;TrivialKernel: @set(input)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    activate Device\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(a)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(b)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @matmul(c)\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    User-&gt;&gt;TrivialKernel: @get()\n    activate TrivialKernel\n    TrivialKernel--&gt;&gt;Device: ;\n    deactivate TrivialKernel\n    TrivialKernel-&gt;&gt;User: ;\n    Device--&gt;&gt;Device: ;\n    deactivate User\n    User-&gt;&gt;User: (wait)\n    Device--&gt;&gt;User: (signal)\n    deactivate Device\n    activate User\n    User-&gt;&gt;User: print(output)\n    deactivate User</code></pre> <p>Note that although the user provided no timeline of their own execution is still ordered correctly due to the internal timeline constructed by the program. If the user wanted to also pipeline execution with another program they could do so by providing their own fences.</p>"},{"location":"developers/design-docs/metal-hal-driver/","title":"Metal HAL driver","text":"<p>This document lists technical details regarding the Metal implemenation of IREE's Hardware Abstraction Layer, called a Metal HAL driver.</p> <p>IREE provides a Hardware Abstraction Layer (HAL) as a common interface to different compute accelerators. IREE HAL's design draws inspiration from modern GPU architecture and APIs; so implementing a HAL driver using modern GPU APIs is generally straightforward. This applies to the Metal HAL driver.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#overall-design-choices","title":"Overall Design Choices","text":"","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#metal-versions","title":"Metal Versions","text":"<p>Currently the Metal HAL driver expects Metal 3 capabilities. Metal 3 was released late 2022 and are supported since macOS Ventura and iOS 16. It covers recent Apple silicon GPUs including A13+ and M1+ chips and others.</p> <p>In the future, we expect to increase the support to cover Metal 2 capabilities. Metal 2 introduces useful features like argument buffer and others that are necessary for performance and make IREE HAL implementation simpler. Metal 2 was released late 2017 and are supported since macOS High Sierra and iOS 11. It is already dominant (macOS, iOS).</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#programming-languages-and-libraries","title":"Programming Languages and Libraries","text":"<p>The Metal framework only exposes Objective-C or Swift programming language APIs. Metal HAL driver needs to inherit from common HAL abstraction definitions, which are in C. To minimize dependency and binary size and increase performance, we use Metal's Objective-C API for implementing the Metal HAL driver. Header (<code>.h</code>) and implementation (<code>.m</code>) files are put adjacent to each other.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#object-lifetime-management","title":"Object Lifetime Management","text":"<p>Objective-C uses refcount for tracking object lifetime and managing memory. This is traditionally done manually by sending <code>retain</code> and <code>release</code> messages to Objective-C objects. Modern Objective-C allows developers to opt in to use Automatic Reference Counting to let the compiler to automatically deduce and insert <code>retain</code>/<code>release</code> where possible to simplify the burdern of manual management.</p> <p>We don't use ARC in the Metal HAL driver given that IREE has its own object refcount and lifetime management mechanism. Metal HAL GPU objects are tracked with that to be consistent with others. Each Metal HAL GPU object <code>retain</code>s the underlying Metal <code>id&lt;MTL*&gt;</code> object on construction and <code>release</code>s on destruction.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#gpu-objects","title":"GPU Objects","text":"<p>Metal is one of the main modern GPU APIs that provide more explicit control over the hardware. The mapping between IREE HAL classes and Metal protocols are relatively straightforward:</p> IREE HAL Class Metal Protocol <code>iree_hal_driver_t</code> N/A <code>iree_hal_device_t</code> <code>MTLDevice</code> <code>iree_hal_command_buffer_t</code> <code>MTLCommandBuffer</code> <code>iree_hal_semaphore_t</code> <code>MTLSharedEvent</code> <code>iree_hal_allocator_t</code> N/A <code>iree_hal_buffer_t</code> <code>MTLBuffer</code> <code>iree_hal_executable_t</code> <code>MTLLibrary</code> <code>iree_hal_executable_cache_t</code> N/A <code>iree_hal_descriptor_set_layout_t</code> N/A <code>iree_hal_pipeline_layout_t</code> N/A <p>In the following subsections, we go over each pair to provide more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#driver","title":"Driver","text":"<p>There is no native driver abstraction in Metal. IREE's Metal HAL driver still provides a <code>iree_hal_metal_driver_t</code> struct to implement the common <code>iree_hal_driver_t</code> struct. <code>iree_hal_metal_driver_t</code> just <code>retain</code>s all available Metal devices in the system during its lifetime, to guarantee that we have the same <code>id&lt;MTLDevice&gt;</code> for device querying and creation.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#device","title":"Device","text":"<p><code>iree_hal_metal_device_t</code> implements <code>iree_hal_device_t</code> to provide the interface to Metal GPU device by wrapping a <code>id&lt;MTLDevice&gt;</code>. Upon construction, <code>iree_hal_metal_device_t</code> creates and retains one queue for both dispatch and transfer during its lifetime. In the future we expect to spport multiple queues for better concurrency.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer-submission","title":"Command buffer submission","text":"<p>In IREE HAL, command buffers are directly created from the <code>iree_hal_device_t</code>. It's also directly submitted there via <code>iree_hal_device_queue_execute()</code>. Each execution takes a batch of command buffers, together with a list of waiting <code>iree_hal_semaphore_t</code>s and a list signaling <code>iree_hal_semaphore_t</code>s. There is no direct mapping of such structure in Metal; so we performs the submission in three steps:</p> <ol> <li>Create a new <code>MTLCommandBuffer</code> to <code>encodeWaitForEvent:value</code> for all    waiting <code>iree_hal_semaphore_t</code>s and commit this command buffer.</li> <li>Commit all command buffers in the submmision batch.</li> <li>Create a new <code>MTLCommandBuffer</code> to <code>encodeSignalEvent:value</code> for all    signaling <code>iree_hal_semaphore_t</code>s and commit this command buffer.</li> </ol> <p>Such submission enables asynchronous execution of the workload on the GPU.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#queue-ordered-allocation","title":"Queue-ordered allocation","text":"<p>Queue-ordered asynchronous allocations via <code>iree_hal_device_queue_alloc</code> is not fully supported yet; it just translates to blocking wait and allocation.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#collectives","title":"Collectives","text":"<p>Collectives suppport is not yet implemented.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#profiling","title":"Profiling","text":"<p>The Metal HAL driver supports profiling via <code>MTLCaptureManager</code>. We can either capture to a trace file or XCode.</p> <p>To perform profiling in the command line, attach <code>--device_profiling_mode=queue --device_profiling_file=/path/to/metal.gputrace</code> to IREE binaries.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer","title":"Command buffer","text":"<p>Command buffers are where IREE HAL and Metal API have a major difference.</p> <p>IREE HAL command buffers follow the flat Vulkan recording model, where all memory or dispatch commands are recorded into a command buffer directly. Unlike Vulkan, Metal adopts a multi-level command recording model--memory/dispatch commands are not directly recorded into a command buffer; rather, they must go through the additional level of blit/compute encoders. Implementing IREE's HAL using Metal would require switching encoders for interleaved memory and dispatch commands. Additionally, certain IREE HAL API features do not have direct mapping in Metal APIs, e.g., various forms of IREE HAL execution/memory barriers. Translating them would require looking at both previous and next commands to decide the proper mapping.</p> <p>Due to these reasons, it's beneficial to have a complete view of the full command buffer and extra flexibility during recording, in order to fixup past commands, or inspect future commands.</p> <p>Therefore, to implement IREE HAL command buffers using Metal, we perform two steps using a linked list of command segments: First we create segments to keep track of all IREE HAL commands and the associated data. And then, when finalizing the command buffer, we iterate through all the segments and record their contents into a proper <code>MTLCommandBuffer</code>. A linked list gives us the flexibility to organize command sequence in low overhead; and a deferred recording gives us the complete picture of the command buffer when really started recording.</p> <p>The Metal HAL driver right now only support one-shot command buffers, by mapping to <code>MTLCommandBuffer</code>s.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#fillcopyupdate-buffer","title":"Fill/copy/update buffer","text":"<p>Metal APIs for fill and copy buffers have alignment restrictions on the offset and length. <code>iree_hal_command_buffer_{fill|copy|update}_buffer()</code> is more flexible regarding that. So for cases aren't directly supported by Metal APIs, we use polyfill compute kernels to perform the memory operation using GPU threads.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#semaphore","title":"Semaphore","text":"<p><code>iree_hal_semaphore_t</code> allows host-&gt;device, device-&gt;host, host-&gt;host, and device-&gt;device synchronization. It maps to Vulkan timeline semaphore. In Metal world, the counterpart would be <code>MTLSharedEvent</code>. Most of the <code>iree_hal_semaphore_t</code> APIs are simple to implement in <code>MetalSharedEvent</code>, with <code>iree_hal_semaphore_wait()</code> as an exception. A listener is registered on the <code>MTLSharedEvent</code> with <code>notifyListener:atValue:block:</code> to singal a semaphore to wake the current thread, which is put into sleep by waiting on the semaphore.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#allocator","title":"Allocator","text":"<p>At the moment the Metal HAL driver just has a very simple <code>iree_hal_allocator_t</code> implementation. It just wraps a <code>MTLDevice</code> and redirects all allocation requests to the <code>MTLDevice</code>. No page/pool/slab or whatever. This is meant to be used together with common allocator layers like the caching allocator.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#buffer","title":"Buffer","text":"<p>IREE <code>iree_hal_buffer_t</code> maps Metal <code>MTLBuffer</code>. See Object Lifetime Management for more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#executable","title":"Executable","text":"<p>IREE <code>iree_hal_executable_t</code> represents a GPU program archive with a driver-defined format. It maps naturally to Metal <code>MTLLibrary</code>. An entry point in a <code>MTLLibrary</code> is a <code>MTLFunction</code>. We define <code>iree_hal_metal_kernel_params_t</code> to wrap around a <code>MTLLibrary</code>, its <code>MTLFunction</code>s, and also <code>MTLComputePipelineState</code> objects constructed from <code>MTLFunction</code>s.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#executable-cache","title":"Executable cache","text":"<p>IREE <code>iree_hal_executable_cache_t</code> is modeling a cache of preprared GPU executables for a particular device. At the moment the Metal HAL driver does not peforming any caching on GPU programs; it simply reads the program from the FlatBuffer and hands it over to Metal driver.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#descriptor-set-pipeline-layout","title":"Descriptor set / pipeline layout","text":"<p>See Resource descriptors for more details.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#compute-pipeline","title":"Compute Pipeline","text":"","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#shaderkernel-compilation","title":"Shader/kernel compilation","text":"<p>Metal has Metal Shading Language (MSL) for authoring graphics shaders and compute kernels. MSL source code can be directly consumed by the Metal framework at run-time; it can also be compiled first into an opaque library using command-line tools at build-time.</p> <p>IREE uses compilers to compile ML models expressed with high-level op semantics down to GPU native source format. This is also the case for the Metal HAL driver. Metal does not provide an open intermediate language; we reuse the SPIR-V code generation pipeline and then cross compile the generated SPIR-V into MSL source with SPIRV-Cross. This is actually a fair common practice for targeting multiple GPU APIs in graphics programming world. For example, the Vulkan implmenation in macOS/iOS, MoltenVK, is also doing the same for shaders/kernels. The path is quite robust, as demonstrated by various games on top of MoltenVK.</p> <p>Therefore, in IREE, we have a <code>MetalSPIRVTargetBackend</code>, which pulls in the common SPIR-V passes to form the compilation pipeline. The difference would be to provide a suitable SPIR-V target environment to drive the compilation, which one can derive from the Metal GPU families to target. The serialization step differs from <code>VulkanSPIRVTargetBackend</code> too: following the normal SPIR-V serialization step, we additionally need to invoke SPRIV-Cross to cross compile the generated SPIR-V into MSL, and then compile and/or serialize the MSL source/library.</p> <p>IREE uses FlatBuffer to encode the whole workload module, including both GPU shader/kernel (called executable in IREE terminology) and CPU scheduling logic. The GPU executables are embedded as part of the module's FlatBuffer, which are <code>mmap</code>ped when IREE runs.</p> <p>For the Metal HAL driver, it means we need to embed the MSL kernels inside the module FlatBuffer. Right now we can either encode the MSL source strings and compile them at Metal run-time, or directly encoding the library instead.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#workgroupthreadgroup-size","title":"Workgroup/threadgroup size","text":"<p>When dispatching a compute kernel in Metal, we need to specify the number of thread groups in grid and the number of threads in thread group. Both are 3-D vectors. IREE HAL, which follows Vulkan, calls them workgroup count and workgroup size, respectively.</p> <p>In Vulkan programming model, workgroup count and workgroup size are specified at different places: the former is given when invoking <code>vkCmdDispatch()</code>, while the later is encoded in the dispatched SPIR-V code. This split does not match the Metal model, where we specify both in the API with <code>dispatchThreads:threadsPerThreadgroup:</code>.</p> <p>As said in shader/kernel compilation, MSL kernels are cross compiled from SPIR-V code and then embeded in the module FlatBuffer. The module FlatBuffer provides us a way to convey the threadgroup/workgroup size information extracted from the SPIR-V code. We encode an additional 3-D vector for each entry point and use it as the threadgroup size when later dispatching the <code>MTLFunction</code> corresponding to the entry point.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#resource-descriptors","title":"Resource descriptors","text":"<p>A descriptor is an opaque handle pointing to a resource that is accessed in the compute kernel. IREE's HAL models several concepts related to GPU resource management explicitly:</p> <ul> <li><code>iree_hal_descriptor_set_layout_t</code>: a schema for   describing an array of descriptor bindings. Each descriptor binding specifies   the resource type, access mode and other information.</li> <li><code>iree_hal_pipeline_layout_t</code>: a schema for describing all   the resources accessed by a compute pipeline. It includes zero or more   <code>DescriptorSetLayout</code>s and (optional) push constants.</li> </ul> <p>However, this isn't totally matching Metal's paradigm. In the Metal framework, the closest concept to descriptor sets would be argument buffer. There is no direct correspondence to descriptor set layout and pipeline layout. Rather, the layout is implicitly encoded in Metal shaders as MSL structs. The APIs for creating argument buffers do not encourage early creation without pipelines: one typically creates them for each <code>MTLFunction</code>.</p> <p>All of this means it's better to defer the creation of the argument buffer until the point of compute pipeline creation and dispatch. Therefore, the Metal HAL driver's <code>iree_hal_metal_descriptor_set_layout_t</code> and <code>iree_hal_metal_pipeline_layout_t</code> are just containers holding the information up for recording command buffer dispatch.</p>","tags":["GPU","Metal"]},{"location":"developers/design-docs/metal-hal-driver/#command-buffer-dispatch","title":"Command buffer dispatch","text":"<p>Metal HAL driver command buffer dispatch recording performs the following steps with the current active <code>MTLComputeCommandEncoder</code>:</p> <ol> <li>Bind the <code>MTLComputePipelineState</code> for the current entry function.</li> <li>Encode the push constants using <code>setBytes:length:atIndex</code>.</li> <li>For each bound descriptor set at set #<code>S</code>:</li> <li>Create a <code>MTLArgumentEncoder</code> for encoding an       associated argument <code>MTLBuffer</code>.</li> <li>For each bound resource buffer at binding #<code>B</code> in this descriptor set,       encode it to the argument buffer index #<code>B</code> with       <code>setBuffer::offset::atIndex:</code> and inform the <code>MTLComputeCommandEncoder</code>       that the dispatch will use this resource with <code>useResource:usage:</code>.</li> <li>Set the argument <code>MTLBuffer</code> to buffer index #<code>S</code>.</li> <li>Dispatch with <code>dispatchThreadgroups:threadsPerThreadgroup:</code>.</li> </ol>","tags":["GPU","Metal"]},{"location":"developers/general/contributing/","title":"Contributing to IREE","text":"<p>We'd love to accept your patches and contributions to this project.</p> <p>Note - coordinating efforts</p> <p>Please file issues or reach out on any of our other communication channels before doing substantial work; this will ensure that others don't duplicate the work and that there's a chance to discuss any design issues.</p>"},{"location":"developers/general/contributing/#developer-policies","title":"Developer policies","text":""},{"location":"developers/general/contributing/#code-of-conduct","title":"Code of conduct","text":"<p>This project follows the OpenXLA Code of Conduct.</p>"},{"location":"developers/general/contributing/#developer-certificate-of-origin","title":"Developer Certificate of Origin","text":"<p>Contributors must certify that they wrote or otherwise have the right to submit the code they are contributing to the project.</p> Expand to read the full DCO agreement text <p>By making a contribution to this project, I certify that:</p> <ol> <li> <p>The contribution was created in whole or in part by me and I have the   right to submit it under the open source license indicated in the file; or</p> </li> <li> <p>The contribution is based upon previous work that, to the best of my   knowledge, is covered under an appropriate open source license and I have   the right under that license to submit that work with modifications, whether   created in whole or in part by me, under the same open source license   (unless I am permitted to submit under a different license), as indicated   in the file; or</p> </li> <li> <p>The contribution was provided directly to me by some other person who   certified 1., 2. or 3. and I have not modified it.</p> </li> <li> <p>I understand and agree that this project and the contribution are public   and that a record of the contribution (including all personal information   I submit with it, including my sign-off) is maintained indefinitely and   may be redistributed consistent with this project or the open source   license(s) involved.</p> </li> </ol> <p>Signing is enforced by the DCO GitHub App. This requires that all commits included in pull requests include a <code>Signed-off-by</code> line:</p> <pre><code>This is my commit message\n\nSigned-off-by: Random J Developer &lt;random@developer.example.org&gt;\n</code></pre> <ul> <li> <p>Git will automatically append this message if you use the <code>-s</code> option:</p> <pre><code>git commit -s -m 'This is my commit message'\n</code></pre> </li> <li> <p>Users of Visual Studio Code can add   <code>\"git.alwaysSignOff\": true,</code> in their settings.</p> </li> <li> <p>For more information about DCO enforcement and git workflows, see the   dcoapp/app repository.</p> </li> </ul>"},{"location":"developers/general/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"<p>CLA is being replaced with DCO. Both are enabled while we migrate.</p> <p>Contributions to this project must be accompanied by a Contributor License Agreement (CLA). Head over to https://cla.developers.google.com/ to see your current agreements on file or to sign a new one.</p> <ul> <li>You (or your employer) retain the copyright to your contribution; this simply   gives us permission to use and redistribute your contributions as part of the   project.</li> <li>You generally only need to submit a CLA once, so if you've already submitted   one (even if it was for a different project), you probably don't need to do it   again.</li> </ul>"},{"location":"developers/general/contributing/#authors-codeowners-and-maintainers","title":"AUTHORS, CODEOWNERS, and MAINTAINERS","text":"<p>The <code>AUTHORS</code> file keeps track of those who have made significant contributions to the project.</p> <ul> <li>If you would like additional recognition for your contributions, you may add   yourself or your organization (please add the entity who owns the copyright   for your contributions).</li> <li>The source control history remains the most accurate source for individual   contributions.</li> </ul> <p>The <code>.github/CODEOWNERS</code> file lets maintainers opt in to PR reviews modifying certain paths.</p> <ul> <li>Review is not required from a code owner, though it is recommended.</li> </ul> <p>The <code>MAINTAINERS.md</code> file documents official maintainers for project components.</p>"},{"location":"developers/general/contributing/#coding-policies","title":"Coding policies","text":""},{"location":"developers/general/contributing/#coding-style-guidelines","title":"Coding style guidelines","text":"<p>Most of the code style is derived from the Google Style Guides for the appropriate language and is generally not something we accept changes on (as clang-format and other linters set that for us). The C++ compiler portion of the project follows the MLIR/LLVM style guide.</p> <p>Improvements to code structure and clarity are welcome but please file issues to track such work first. Pure style changes are unlikely to be accepted unless they are applied consistently across the project.</p> Tip - code formatters and lint scripts <p>Formatters like <code>clang-format</code> (C/C++) and Black (Python) can be set to run automatically in your editor of choice.</p> <p>The script at <code>build_tools/scripts/lint.sh</code> can also be used to run the full suite of lint checks.</p>"},{"location":"developers/general/contributing/#testing-policy","title":"Testing policy","text":"<p>With few exceptions, features should be accompanied by automated tests.</p> <p>We use a mix of in-tree and out-of-tree unit and integration tests. For more information about the types of tests used across the project, refer to the testing guide.</p>"},{"location":"developers/general/contributing/#github-policies","title":"GitHub policies","text":""},{"location":"developers/general/contributing/#code-reviews","title":"Code reviews","text":"<p>All submissions, including submissions by maintainers, require review. We use GitHub pull requests (PRs) for this purpose. Consult GitHub Help for more information on using pull requests.</p> <ul> <li>Please keep PRs small (focused on a single issue) to make reviews and later   culprit-finding easier.</li> <li>You may see trusted core contributors bending this rule for project   maintenance and major subsystem renovation. If you feel like the rules aren't   working for a certain situation, please ask as we bias towards pragmatism for   cases that require it.</li> </ul>"},{"location":"developers/general/contributing/#github-actions-workflows","title":"GitHub Actions workflows","text":"<p>We use GitHub Actions to automatically build and test various parts of the project.</p> <ul> <li>Most presubmit workflows will only run automatically on PRs if you are a   project collaborator. Otherwise a maintainer must   approve workflow runs.   If you are sending code changes to the project, please   request commit access, so that these can run   automatically.</li> <li>It is generally expected that PRs will only be merged when all checks are   passing. In some cases, pre-existing failures may be bypassed by a maintainer.</li> </ul> Tip - adjusting workflow behavior <p>Some workflows only run on commits after they are merged. See the CI behavior manipulation section below to learn how to customize this behavior.</p>"},{"location":"developers/general/contributing/#merging-approved-changes","title":"Merging approved changes","text":"<p>After review and presubmit checks, PRs should typically be merged using \"squash and merge\".</p> <ul> <li>The squashed commit summary should match the PR title and the commit   description should match the PR body (this is the default behavior).   Accordingly, please write these as you would a helpful commit message.</li> </ul> <p>It is assumed that the PR author will merge their change unless they ask someone else to merge it for them (e.g. because they don't have write access yet).</p>"},{"location":"developers/general/contributing/#obtaining-commit-access","title":"Obtaining commit access","text":"<p>Access to affiliated repositories is divided into tiers:</p> Tier Description Team link Triage New project members should typically start here Can be assigned issues Can apply labels to issues / PRs Can run workflows without approval iree-triage Write Established project contributors should request this access Can merge approved pull requests Can create branches iree-write Maintain/Admin  Can edit repository settings Can push to protected branches Added case-by-case <p>All access tiers first require joining the iree-org GitHub organization.</p> <p>Fill out this form to request access </p> <p>Once you are a member of the iree-org GitHub organization, you can request to join any of the teams on https://github.com/orgs/iree-org/teams.</p> <p>Note: other GitHub organizations</p> <p>Work on IREE sometimes spans other GitHub organizations like shark-infra. Reach out to a project member if you would also like access to repositories in those organizations.</p>"},{"location":"developers/general/contributing/#branch-naming","title":"Branch naming","text":"<p>Most work should be done on repository forks. For developers with write access, when creating a branch in the common iree-org/iree repository, please follow these naming guidelines:</p> Branch type Naming scheme Example Single user <code>users/[username]/*</code> <code>users/cooldeveloper/my-awesome-feature</code> Shared feature branch <code>shared/*</code> <code>shared/pytorch-performance-sprint</code> Dependency updates <code>integrates/*</code> <code>integrates/integrate-llvm-20240501</code> <p>Branches that do not meet these guidelines may be deleted, especially if they appear to be stale.</p>"},{"location":"developers/general/contributing/#tips-for-contributors","title":"Tips for contributors","text":""},{"location":"developers/general/contributing/#tool-recommendations","title":"Tool recommendations","text":"Program or tool Description  Visual Studio Code (VSCode) The most commonly used editor amongst IREE developers  Ccache A fast C/C++ compiler cache. See the CMake with <code>ccache</code> page  GitHub CLI A CLI for interacting with GitHub  \"Refined GitHub\" browser extensions Extension that add features to the GitHub UI"},{"location":"developers/general/contributing/#build-systems","title":"Build systems","text":"<p>IREE supports building from source with both Bazel and CMake.</p> <ul> <li>CMake is the preferred build system and offers the most flexible   configuration options</li> <li>Bazel is a stricter build system and helps with usage in Google's downstream   source repository</li> <li>Certain dependencies (think large/complex projects like CUDA, TensorFlow,   PyTorch, etc.) may be difficult to support with one build system or the   other, so the project may configure these as optional</li> </ul>"},{"location":"developers/general/contributing/#continuous-integration-ci","title":"Continuous integration (CI)","text":"<p>IREE uses GitHub Actions for CI. The primary CI is configured in the ci.yml workflow file.</p>"},{"location":"developers/general/contributing/#self-hosted-runners","title":"Self-hosted runners","text":"<p>In addition to the default runners GitHub provides, IREE uses self-hosted runners to run many of its workflow jobs. These enable access to additional compute and custom configurations such as accelerators. Configuration scripting is checked in to this repository (see the README for that directory).</p>"},{"location":"developers/general/contributing/#ci-behavior-manipulation","title":"CI behavior manipulation","text":"<p>The setup step of the CI determines which CI jobs to run. This is controlled by the configure_ci.py script. It will generally run a pre-determined set of jobs on presubmit with some jobs kept as post-submit only. If changes are only to a certain set of excluded files that we know don't affect CI (e.g. Markdown files), then it will skip the jobs.</p> <p>You can customize which jobs run using git trailers in the PR description.</p> <p>The available options are</p> <pre><code>ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\nskip-ci: free form reason\nskip-llvm-integrate-benchmark: free form reason\nbenchmark-extra: extra,benchmarks,to,run\nrunner-env: [testing|prod]\n</code></pre> Using <code>skip-ci</code> <p><code>skip-ci</code> skips all jobs. It is mutually exclusive with the other <code>ci-*</code> options and is synonomous with <code>ci-skip: all</code>.</p> <pre><code>skip-ci: free form reason\n</code></pre> Using <code>ci-skip</code>, <code>ci-extra</code>, <code>ci-exactly</code> <p>The <code>ci-*</code> options instruct the setup script on which jobs to include or exclude from its run. They take a comma-separated list of jobs which must be from the set of top-level job identifiers in the <code>ci.yml</code> file or the special keyword \"all\" to indicate all jobs.</p> <pre><code>ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\n</code></pre> <ul> <li><code>ci-skip</code> removes jobs that would otherwise be included, though it is not an error to list jobs that would not be included by default.</li> <li><code>ci-extra</code> adds additional jobs that would not have otherwise been run, though it is not an error to list jobs that would have been included anyway. It is an error to list a job in both \"skip\" and \"extra\".</li> <li><code>ci-exactly</code> provides an exact list of jobs that should run. It is mutually exclusive with both \"skip\" and \"extra\".</li> </ul> <p>In all these cases, the setup does not make any effort to ensure that job dependencies are satisfied. Thus, if you request skipping the <code>build_all</code> job, all the jobs that depend on it will fail, not be skipped.</p> Using <code>benchmark-extra</code>, <code>skip-llvm-integrate-benchmark</code> <pre><code>benchmark-extra: extra,benchmarks,to,run\nskip-llvm-integrate-benchmark: free form reason\n</code></pre> <p>Benchmarks don't run by default on PRs, and must be specifically requested.</p> <p>The <code>benchmark-extra</code> option allows specifying additional benchmark presets to run as part of benchmarking. It accepts a comma-separated list of benchmark presets. This combines with labels added to the PR (which are a more limited set of options). See the benchmark suites documentation.</p> <p>Benchmarks do run by default on PRs detected to be an integration of LLVM into IREE, but this behavior can be disabled with <code>skip-llvm-integrate-benchmark</code>.</p> Using <code>runner-env</code> <p>The <code>runner-env</code> option controls which runner environment to target for our self-hosted runners. We maintain a test environment to allow testing out new configurations prior to rolling them out. This trailer is for advanced users who are working on the CI infrastructure itself.</p> <pre><code>runner-env: [testing|prod]\n</code></pre>"},{"location":"developers/general/contributing/#ci-configuration-recipes","title":"CI configuration recipes","text":"<p>Copy/paste any of these at the bottom of a PR description to change what the CI runs.</p> <ul> <li> <p>Also run Windows and macOS builds that are normally post-merge only:</p> <pre><code>ci-extra: build_test_all_windows,build_test_all_macos_arm64,build_test_all_macos_x86_64\n</code></pre> </li> <li> <p>Also run GPU tests on NVIDIA A100 runners (opt-in due to low availability):</p> <pre><code>ci-extra: test_nvidia_a100\n</code></pre> </li> <li> <p>Skip all CI builds and tests, e.g. for comment-only changes:</p> <pre><code>skip-ci: Comment-only change.\n</code></pre> </li> <li> <p>Only run Bazel builds, e.g. for changes only affecting Bazel rules:</p> <pre><code>ci-exactly: build_test_all_bazel\n</code></pre> </li> </ul> <p>For example, this PR opted in to running the <code>build_test_all_windows</code> job:</p> <p></p> <p>The enabled jobs can be viewed from the Summary page of an action run:</p> <p></p>"},{"location":"developers/general/developer-overview/","title":"Developer overview","text":"<p>This guide provides an overview of IREE's project structure and main tools for developers.</p>"},{"location":"developers/general/developer-overview/#project-code-layout","title":"Project code layout","text":"<ul> <li>/compiler/:   MLIR dialects, LLVM compiler passes, module translation code, etc.<ul> <li>bindings/: Python and other language bindings</li> </ul> </li> <li>/runtime/:   Standalone runtime code including the VM and HAL drivers<ul> <li>bindings/: Python and other language bindings</li> </ul> </li> <li>/integrations/:   Integrations between IREE and other frameworks, such as TensorFlow</li> <li>/tests/:   Tests for full compiler-&gt;runtime workflows</li> <li>/tools/:   Developer tools (<code>iree-compile</code>, <code>iree-run-module</code>, etc.)</li> <li>/samples/: Also see the   separate https://github.com/iree-org/iree-experimental repository</li> </ul>"},{"location":"developers/general/developer-overview/#iree-compiler-code-layout","title":"IREE compiler code layout","text":"<ul> <li>API/:   Public C API</li> <li>Codegen/:   Code generation for compute kernels</li> <li>Dialect/:   MLIR dialects (<code>Flow</code>, <code>HAL</code>, <code>Stream</code>, <code>VM</code>, etc.)</li> <li>InputConversion/:   Conversions from input dialects and preprocessing</li> </ul>"},{"location":"developers/general/developer-overview/#iree-runtime-code-layout","title":"IREE runtime code layout","text":"<ul> <li>base/:   Common types and utilities used throughout the runtime</li> <li>hal/:   Hardware Abstraction Layer for IREE's runtime, with   implementations for hardware and software backends</li> <li>schemas/:   Data storage format definitions, primarily using   FlatBuffers</li> <li>task/:   System for running tasks across multiple CPU threads</li> <li>tooling/:   Utilities for tests and developer tools, not suitable for use as-is in   downstream applications</li> <li>vm/:   Bytecode Virtual Machine used to work with IREE modules and invoke   IREE functions</li> </ul>"},{"location":"developers/general/developer-overview/#developer-tools","title":"Developer tools","text":"<p>IREE's core compiler accepts programs in supported input MLIR dialects (e.g. <code>stablehlo</code>, <code>tosa</code>, <code>linalg</code>). Import tools and APIs may be used to convert from framework-specific formats like TensorFlow SavedModel to MLIR modules. While programs are ultimately compiled down to modules suitable for running on some combination of IREE's target deployment platforms, IREE's developer tools can run individual compiler passes, translations, and other transformations step by step.</p>"},{"location":"developers/general/developer-overview/#iree-opt","title":"iree-opt","text":"<p><code>iree-opt</code> is a tool for testing IREE's compiler passes. It is similar to mlir-opt and runs sets of IREE's compiler passes on <code>.mlir</code> input files. See \"conversion\" in MLIR's Glossary for more information. Transformations performed by <code>iree-opt</code> can range from individual passes performing isolated manipulations to broad pipelines that encompass a sequence of steps.</p> <p>Test <code>.mlir</code> files that are checked in typically include a <code>RUN</code> block at the top of the file that specifies which passes should be performed and if <code>FileCheck</code> should be used to test the generated output.</p> <p>Here's an example of a small compiler pass running on a test file:</p> <pre><code>$ ../iree-build/tools/iree-opt \\\n  --split-input-file \\\n  --mlir-print-ir-before-all \\\n  --iree-util-drop-compiler-hints \\\n  $PWD/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir\n</code></pre> <p>For a more complex example, here's how to run IREE's complete transformation pipeline targeting the VMVX backend on the fullyconnected.mlir model file:</p> <pre><code>$ ../iree-build/tools/iree-opt \\\n  --iree-transformation-pipeline \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/tests/e2e/stablehlo_models/fullyconnected.mlir\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-compile","title":"iree-compile","text":"<p><code>iree-compile</code> is IREE's main compiler driver for generating binaries from supported input MLIR assembly.</p> <p>For example, to translate <code>simple.mlir</code> to an IREE module:</p> <pre><code>$ ../iree-build/tools/iree-compile \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  -o /tmp/simple_abs_vmvx.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-run-module","title":"iree-run-module","text":"<p>The <code>iree-run-module</code> program takes an already translated IREE module as input and executes an exported function using the provided inputs.</p> <p>This program can be used in sequence with <code>iree-compile</code> to translate a <code>.mlir</code> file to an IREE module and then execute it. Here is an example command that executes the simple <code>simple_abs_vmvx.vmfb</code> compiled from <code>simple_abs.mlir</code> above on IREE's local-task CPU device:</p> <pre><code>$ ../iree-build/tools/iree-run-module \\\n  --module=/tmp/simple_abs_vmvx.vmfb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <p>Input scalars are passed as <code>value</code> and input buffers are passed as <code>[shape]xtype=[value]</code>.</p> <ul> <li>Input buffers may also be read from raw binary files or Numpy npy files.</li> </ul> MLIR type Description Input example <code>i32</code> Scalar <code>--input=1234</code> <code>tensor&lt;i32&gt;</code> 0-D tensor <code>--input=i32=1234</code> <code>tensor&lt;1xi32&gt;</code> 1-D tensor (shape [1]) <code>--input=1xi32=1234</code> <code>tensor&lt;2xi32&gt;</code> 1-D tensor (shape [2]) <code>--input=\"2xi32=12 34\"</code> <code>tensor&lt;2x3xi32&gt;</code> 2-D tensor (shape [2, 3]) <code>--input=\"2x3xi32=[1 2 3][4 5 6]\"</code> Other usage examples <p>See these test files for advanced usage examples:</p> Basic testsInputsOutputsExpected <p>Source file: <code>tools/test/iree-run-module.mlir</code></p> tools/test/iree-run-module.mlir<pre><code>// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=\"2xf32=-2 3\") | FileCheck %s\n// RUN: (iree-compile --iree-hal-target-backends=llvm-cpu %s | iree-run-module --device=local-task --module=- --function=abs --input=\"2xf32=-2 3\") | FileCheck %s\n\n// CHECK-LABEL: EXEC @abs\nfunc.func @abs(%input : tensor&lt;2xf32&gt;) -&gt; (tensor&lt;2xf32&gt;) {\n  %result = math.absf %input : tensor&lt;2xf32&gt;\n  return %result : tensor&lt;2xf32&gt;\n}\n  // INPUT-BUFFERS: result[1]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2xf32=-2.0 3.0\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-inputs.mlir</code></p> tools/test/iree-run-module-inputs.mlir<pre><code>// Passing no inputs is okay.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=no_input) | \\\n// RUN: FileCheck --check-prefix=NO-INPUT %s\n// NO-INPUT-LABEL: EXEC @no_input\nfunc.func @no_input() {\n  return\n}\n\n// -----\n\n// Scalars use the form `--input=value`. Type (float/int) should be omitted.\n//   * The VM does not use i1/i8 types, so i32 VM types are returned instead.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=- \\\n// RUN:                  --function=scalars \\\n// RUN:                  --input=1 \\\n// RUN:                  --input=5 \\\n// RUN:                  --input=1234 \\\n// RUN:                  --input=-3.14) | \\\n// RUN: FileCheck --check-prefix=INPUT-SCALARS %s\n// INPUT-SCALARS-LABEL: EXEC @scalars\nfunc.func @scalars(%arg0: i1, %arg1: i8, %arg2 : i32, %arg3 : f32) -&gt; (i1, i8, i32, f32) {\n  // INPUT-SCALARS: result[0]: i32=1\n  // INPUT-SCALARS: result[1]: i32=5\n  // INPUT-SCALARS: result[2]: i32=1234\n  // INPUT-SCALARS: result[3]: f32=-3.14\n  return %arg0, %arg1, %arg2, %arg3 : i1, i8, i32, f32\n}\n\n// -----\n\n// Buffers (\"tensors\") use the form `--input=[shape]xtype=[value]`.\n//   * If any values are omitted, zeroes will be used.\n//   * Quotes should be used around values with spaces.\n//   * Brackets may also be used to separate element values.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=- \\\n// RUN:                  --function=buffers \\\n// RUN:                  --input=i32=5 \\\n// RUN:                  --input=2xi32 \\\n// RUN:                  --input=\"2x3xi32=1 2 3 4 5 6\") | \\\n// RUN: FileCheck --check-prefix=INPUT-BUFFERS %s\n// INPUT-BUFFERS-LABEL: EXEC @buffers\nfunc.func @buffers(%arg0: tensor&lt;i32&gt;, %arg1: tensor&lt;2xi32&gt;, %arg2: tensor&lt;2x3xi32&gt;) -&gt; (tensor&lt;i32&gt;, tensor&lt;2xi32&gt;, tensor&lt;2x3xi32&gt;) {\n  // INPUT-BUFFERS: result[0]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: i32=5\n  // INPUT-BUFFERS: result[1]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2xi32=0 0\n  // INPUT-BUFFERS: result[2]: hal.buffer_view\n  // INPUT-BUFFERS-NEXT: 2x3xi32=[1 2 3][4 5 6]\n  return %arg0, %arg1, %arg2 : tensor&lt;i32&gt;, tensor&lt;2xi32&gt;, tensor&lt;2x3xi32&gt;\n}\n\n// -----\n\n// Buffer values can be read from binary files with `@some/file.bin`.\n//   * numpy npy files from numpy.save or previous tooling output can be read to\n//     provide 1+ values.\n//   * Some data types may be converted (i32 -&gt; si32 here) - bug?\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=npy_round_trip \\\n// RUN:                  --input=2xi32=11,12 \\\n// RUN:                  --input=3xi32=1,2,3 \\\n// RUN:                  --output=@%t.npy \\\n// RUN:                  --output=+%t.npy &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=npy_round_trip \\\n// RUN:                  --input=*%t.npy) | \\\n// RUN: FileCheck --check-prefix=INPUT-NUMPY %s\n\n// INPUT-NUMPY-LABEL: EXEC @npy_round_trip\nfunc.func @npy_round_trip(%arg0: tensor&lt;2xi32&gt;, %arg1: tensor&lt;3xi32&gt;) -&gt; (tensor&lt;2xi32&gt;, tensor&lt;3xi32&gt;) {\n  // INPUT-NUMPY: result[0]: hal.buffer_view\n  // INPUT-NUMPY-NEXT: 2xsi32=11 12\n  // INPUT-NUMPY: result[1]: hal.buffer_view\n  // INPUT-NUMPY-NEXT: 3xsi32=1 2 3\n  return %arg0, %arg1 : tensor&lt;2xi32&gt;, tensor&lt;3xi32&gt;\n}\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-outputs.mlir</code></p> tools/test/iree-run-module-outputs.mlir<pre><code>// Tests that execution providing no outputs is ok.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=no_output) | \\\n// RUN: FileCheck --check-prefix=NO-OUTPUT %s\n// NO-OUTPUT-LABEL: EXEC @no_output\nfunc.func @no_output() {\n  return\n}\n\n// -----\n\n// Tests the default output printing to stdout.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=default) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-DEFAULT %s\n// OUTPUT-DEFAULT-LABEL: EXEC @default\nfunc.func @default() -&gt; (i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // OUTPUT-DEFAULT: result[0]: i32=123\n  %0 = arith.constant 123 : i32\n  // OUTPUT-DEFAULT: result[1]: hal.buffer_view\n  // OUTPUT-DEFAULT-NEXT: f32=4\n  %1 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  // OUTPUT-DEFAULT: result[2]: hal.buffer_view\n  // OUTPUT-DEFAULT-NEXT: 2x4xi32=[0 1 2 3][4 5 6 7]\n  %2 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1, %2 : i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n\n// -----\n\n// Tests explicit output to npy files by producing a concatenated .npy and then\n// printing the results in python. This also verifies our npy files can be\n// parsed by numpy.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \\\n// RUN:  iree-run-module --device=local-sync --module=- --function=numpy \\\n// RUN:                  --output= \\\n// RUN:                  --output=@%t.npy \\\n// RUN:                  --output=+%t.npy) &amp;&amp; \\\n// RUN:  \"%PYTHON\" %S/echo_npy.py %t.npy | \\\n// RUN: FileCheck --check-prefix=OUTPUT-NUMPY %s\nfunc.func @numpy() -&gt; (i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // Output skipped:\n  %0 = arith.constant 123 : i32\n  // OUTPUT-NUMPY{LITERAL}: 4.0\n  %1 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  // OUTPUT-NUMPY-NEXT{LITERAL}: [[0 1 2 3]\n  // OUTPUT-NUMPY-NEXT{LITERAL}:  [4 5 6 7]]\n  %2 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1, %2 : i32, tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n\n// -----\n\n// Tests output to binary files by round-tripping the output of a function into\n// another invocation reading from the binary files. Each output is written to\n// its own file (optimal for alignment/easier to inspect).\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=write_binary \\\n// RUN:                  --output=@%t.0.bin \\\n// RUN:                  --output=@%t.1.bin &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=echo_binary \\\n// RUN:                  --input=f32=@%t.0.bin \\\n// RUN:                  --input=2x4xi32=@%t.1.bin) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-BINARY %s\n\n// Tests output to binary files by round-tripping the output of a function into\n// another invocation reading from the binary files. The values are appended to\n// a single file and read from the single file.\n\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=write_binary \\\n// RUN:                  --output=@%t.bin \\\n// RUN:                  --output=+%t.bin &amp;&amp; \\\n// RUN:  iree-run-module --device=local-sync \\\n// RUN:                  --module=%t.vmfb \\\n// RUN:                  --function=echo_binary \\\n// RUN:                  --input=f32=@%t.bin \\\n// RUN:                  --input=2x4xi32=+%t.bin) | \\\n// RUN: FileCheck --check-prefix=OUTPUT-BINARY %s\n\nfunc.func @write_binary() -&gt; (tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  %0 = arith.constant dense&lt;4.0&gt; : tensor&lt;f32&gt;\n  %1 = flow.tensor.dynamic_constant dense&lt;[[0,1,2,3],[4,5,6,7]]&gt; : tensor&lt;2x4xi32&gt; -&gt; tensor&lt;?x4xi32&gt;\n  return %0, %1 : tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\nfunc.func @echo_binary(%arg0: tensor&lt;f32&gt;, %arg1: tensor&lt;?x4xi32&gt;) -&gt; (tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;) {\n  // OUTPUT-BINARY{LITERAL}: f32=4\n  // OUTPUT-BINARY{LITERAL}: 2x4xi32=[0 1 2 3][4 5 6 7]\n  return %arg0, %arg1 : tensor&lt;f32&gt;, tensor&lt;?x4xi32&gt;\n}\n</code></pre> <p>Source file: <code>tools/test/iree-run-module-expected.mlir</code></p> tools/test/iree-run-module-expected.mlir<pre><code>// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=2.0) | FileCheck %s --check-prefix=SUCCESS-MATCHES\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=\"(ignored)\") | FileCheck %s --check-prefix=SUCCESS-IGNORED\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=2.1 --expected_f32_threshold=0.1) | FileCheck %s --check-prefix=SUCCESS-THRESHOLD\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=123 --expected_output=f32=2.0) | FileCheck %s --check-prefix=FAILED-FIRST\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=f32=4.5) | FileCheck %s --check-prefix=FAILED-SECOND\n// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | not iree-run-module --device=local-task --module=- --function=abs --input=f32=-2 --expected_output=f32=-2 --expected_output=4xf32=2.0) | FileCheck %s --check-prefix=FAILED-SHAPE\n\n// SUCCESS-MATCHES: [SUCCESS]\n// SUCCESS-THRESHOLD: [SUCCESS]\n// SUCCESS-IGNORED: [SUCCESS]\n// FAILED-FIRST: [FAILED] result[0]: element at index 0 (-2) does not match the expected (123)\n// FAILED-SECOND: [FAILED] result[1]: element at index 0 (2) does not match the expected (4.5)\n// FAILED-SHAPE: [FAILED] result[1]: metadata is f32; expected that the view matches 4xf32\n\nfunc.func @abs(%input: tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;, tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %input, %result : tensor&lt;f32&gt;, tensor&lt;f32&gt;\n}\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-check-module","title":"iree-check-module","text":"<p>The <code>iree-check-module</code> program takes an already translated IREE module as input and executes it as a series of googletest tests. This is the test runner for the IREE check framework.</p> <pre><code>$ ../iree-build/tools/iree-compile \\\n  --iree-input-type=stablehlo \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/tests/e2e/stablehlo_ops/abs.mlir \\\n  -o /tmp/abs.vmfb\n</code></pre> <pre><code>$ ../iree-build/tools/iree-check-module \\\n  --device=local-task \\\n  --module=/tmp/abs.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-run-mlir","title":"iree-run-mlir","text":"<p>The <code>iree-run-mlir</code> program takes a <code>.mlir</code> file as input, translates it to an IREE bytecode module, and executes the module.</p> <p>It is designed for testing and debugging, not production uses, and therefore does some additional work that usually must be explicit, like marking every function as exported by default and running all of them.</p> <p>For example, to execute the contents of samples/models/simple_abs.mlir:</p> <pre><code># iree-run-mlir &lt;compiler flags&gt; [input.mlir] &lt;runtime flags&gt;\n$ ../iree-build/tools/iree-run-mlir \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  --input=f32=-2\n</code></pre>"},{"location":"developers/general/developer-overview/#iree-dump-module","title":"iree-dump-module","text":"<p>The <code>iree-dump-module</code> program prints the contents of an IREE module FlatBuffer file.</p> <p>For example, to inspect the module translated above:</p> <pre><code>../iree-build/tools/iree-dump-module /tmp/simple_abs_vmvx.vmfb\n</code></pre>"},{"location":"developers/general/developer-overview/#useful-generic-flags","title":"Useful generic flags","text":""},{"location":"developers/general/developer-overview/#read-inputs-from-a-file","title":"Read inputs from a file","text":"<p>All the IREE tools support reading input values from a file. This is quite useful for debugging. Use <code>--help</code> for each tool to see what the flag to set. The inputs are expected to be newline-separated. Each input should be either a scalar or a buffer. Scalars should be in the format <code>type=value</code> and buffers should be in the format <code>[shape]xtype=[value]</code>. For example:</p> <pre><code>1x5xf32=1,-2,-3,4,-5\n1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1\n</code></pre>"},{"location":"developers/general/developer-overview/#-iree-flow-trace-dispatch-tensors","title":"<code>--iree-flow-trace-dispatch-tensors</code>","text":"<p>This flag will enable tracing inputs and outputs for each dispatch function. It is easier to narrow down test cases, since IREE breaks a ML workload into multiple dispatch function. When the flag is on, IREE will insert trace points before and after each dispatch function. The first trace op is for inputs, and the second trace op is for outputs. There will be two events for one dispatch function.</p>"},{"location":"developers/general/developer-tips/","title":"Developer tips and tricks","text":"<p>The IREE compiler is built using MLIR, so it naturally supports the common MLIR debugging workflows. For areas where IREE differentiates itself, this page lists other helpful tips and tricks.</p>"},{"location":"developers/general/developer-tips/#setting-compiler-options","title":"Setting compiler options","text":"<p>Tools such as <code>iree-compile</code> take options via command-line flags. Pass <code>--help</code> to see the full list:</p> <pre><code>$ iree-compile --help\n\nOVERVIEW: IREE compilation driver\n\nUSAGE: iree-compile [options] &lt;input file or '-' for stdin&gt;\n\nOPTIONS:\n  ...\n</code></pre> <p>Tip - Options and the Python bindings</p> <p>If you are using the Python bindings, options can be passed via the <code>extra_args=[\"--flag\"]</code> argument:</p> <pre><code>import iree.compiler as ireec\n\ninput_mlir = \"\"\"\nfunc.func @abs(%input : tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %result : tensor&lt;f32&gt;\n}\"\"\"\n\ncompiled_module = ireec.tools.compile_str(\n    input_mlir,\n    target_backends=[\"llvm-cpu\"],\n    extra_args=[\"--mlir-timing\"])\n</code></pre>"},{"location":"developers/general/developer-tips/#inspecting-vmfb-files","title":"Inspecting <code>.vmfb</code> files","text":"<p>The IREE compiler generates FlatBuffer files using the <code>.vmfb</code> file extension, short for \"Virtual Machine FlatBuffer\", which can then be loaded and executed using IREE's runtime.</p> Info - other output formats <p>The IREE compiler can output different formats with the <code>`--output-format=</code> flag:</p> Flag value Output <code>--output-format=vm-bytecode</code> (default) VM Bytecode (<code>.vmfb</code>) files <code>--output-format=vm-c</code> C source modules <p>VM Bytecode files are usable across a range of deployment scenarios, while C source modules provide low level connection points for constrained environments like bare metal platforms.</p> <p>By default, <code>.vmfb</code> files can be opened as zip files: (1)</p> <ol> <li>Setting <code>--iree-vm-emit-polyglot-zip=false</code> will disable this feature and    decrease file size slightly</li> </ol> <pre><code>$ unzip -d simple_abs_cpu ./simple_abs_cpu.vmfb\n\nArchive:  ./simple_abs_cpu.vmfb\n  extracting: simple_abs_cpu/module.fb\n  extracting: simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n</code></pre> <p>The embedded binary (here an ELF shared object with CPU code) can be parsed by standard tools:</p> <pre><code>$ readelf -Ws ./simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n\nSymbol table '.dynsym' contains 2 entries:\n  Num:    Value          Size Type    Bind   Vis      Ndx Name\n    0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND\n    1: 0000000000001760    17 FUNC    GLOBAL DEFAULT    7 iree_hal_executable_library_query\n\nSymbol table '.symtab' contains 42 entries:\n  Num:    Value          Size Type    Bind   Vis      Ndx Name\n    0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND\n    1: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS abs_dispatch_0\n    2: 0000000000001730    34 FUNC    LOCAL  DEFAULT    7 abs_dispatch_0_generic\n    3: 00000000000034c0    80 OBJECT  LOCAL  DEFAULT    8 iree_hal_executable_library_query_v0\n    4: 0000000000001780   111 FUNC    LOCAL  DEFAULT    7 iree_h2f_ieee\n    5: 00000000000017f0   207 FUNC    LOCAL  DEFAULT    7 iree_f2h_ieee\n    ...\n</code></pre> <p>The <code>iree-dump-module</code> tool can also be used to see information about a given <code>.vmfb</code> file:</p> <pre><code>$ iree-dump-module simple_abs.vmfb\n\n//===---------------------------------------------------------------------===//\n// @module : version 0\n//===---------------------------------------------------------------------===//\n\nRequired Types:\n  [  0] i32\n  [  1] i64\n  [  2] !hal.allocator\n  [  3] !hal.buffer\n  ...\n\nModule Dependencies:\n  hal, version &gt;= 0, required\n\nImported Functions:\n  [  0] hal.allocator.allocate(!vm.ref&lt;?&gt;, i32, i32, i64) -&gt; (!vm.ref&lt;?&gt;)\n  [  1] hal.devices.get(i32) -&gt; (!vm.ref&lt;?&gt;)\n  ...\n\nExported Functions:\n  [  0] abs(!vm.ref&lt;?&gt;) -&gt; (!vm.ref&lt;?&gt;)\n  [  1] __init() -&gt; ()\n\n...\n</code></pre>"},{"location":"developers/general/developer-tips/#dumping-executable-files","title":"Dumping executable files","text":"<p>The <code>--iree-hal-dump-executable-*</code> flags instruct the compiler to save files related to \"executable translation\" (code generation for a specific hardware target) into a directory of your choosing. If you are interested in seeing which operations in your input program were fused into a compute kernel or what device code was generated for a given program structure, these flags are a great starting point.</p> Flag Files dumped <code>iree-hal-dump-executable-files-to</code> All files (meta-flag) <code>iree-hal-dump-executable-sources-to</code> Source <code>.mlir</code> files prior to HAL compilation <code>iree-hal-dump-executable-intermediates-to</code> Intermediate files (e.g. <code>.o</code> files, <code>.mlir</code> stages) <code>iree-hal-dump-executable-binaries-to</code> Binary files (e.g. <code>.so</code>, <code>.spv</code>, <code>.ptx</code>), as used in the <code>.vmfb</code> <code>iree-hal-dump-executable-benchmarks-to</code> Standalone benchmark files for <code>iree-benchmark-module</code> CPUGPU - VulkanGPU - CUDA <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-llvmcpu-link-embedded=false \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_system_elf_x86_64_benchmark.mlir\nmodule_abs_dispatch_0_system_elf_x86_64.codegen.bc\nmodule_abs_dispatch_0_system_elf_x86_64.linked.bc\nmodule_abs_dispatch_0_system_elf_x86_64.optimized.bc\nmodule_abs_dispatch_0_system_elf_x86_64.o\nmodule_abs_dispatch_0_system_elf_x86_64.s\nmodule_abs_dispatch_0_system_elf_x86_64.so\nsimple_abs_cpu.vmfb\n</code></pre> <p>Tip - Embedded and system linking</p> <p>The default value of <code>--iree-llvmcpu-link-embedded=true</code> generates embedded ELF files. By disabling that flag, the compiler will produce platform-standard <code>.so</code> files for Linux, <code>.dll</code> files for Windows, etc. While embedded ELF files can be smaller and more portable, inspection of artifacts is easier with platform-standard shared object files.</p> Tip - Disassembling <code>.bc</code> files with <code>llvm-dis</code> <p>The <code>.bc</code> intermediate files use the LLVM BitCode format, which can be disassembled using <code>llvm-dis</code>:</p> <pre><code>// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_system_elf_x86_64.codegen.bc\n$ cat module_abs_dispatch_0_system_elf_x86_64.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_system_elf_x86_64.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\ntarget triple = \"x86_64-linux-gnu\"\n\n%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }\n%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }\n\n...\n\ndefine internal i32 @abs_dispatch_0_generic(\n    ptr noalias nonnull align 16 %0,\n    ptr noalias nonnull align 16 %1,\n    ptr noalias nonnull align 16 %2) #0 {\n  %4 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10,\n  %6 = load ptr, ptr %5, align 8,\n  %7 = ptrtoint ptr %6 to i64,\n  %8 = and i64 %7, 63,\n  %9 = icmp eq i64 %8, 0,\n  call void @llvm.assume(i1 %9),\n  %10 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n  %11 = extractvalue %iree_hal_executable_dispatch_state_v0_t %10, 10,\n  %12 = getelementptr ptr, ptr %11, i32 1,\n  %13 = load ptr, ptr %12, align 8,\n  %14 = ptrtoint ptr %13 to i64,\n  %15 = and i64 %14, 63,\n  %16 = icmp eq i64 %15, 0,\n  call void @llvm.assume(i1 %16),\n  %17 = load float, ptr %6, align 4,\n  %18 = call float @llvm.fabs.f32(float %17),\n  store float %18, ptr %13, align 4,\n  ret i32 0,\n}\n\n...\n</code></pre> <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=vulkan-spirv \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_vulkan.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb_benchmark.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.spv\nsimple_abs_vulkan.vmfb\n</code></pre> Tip - Disassembling <code>.spv</code> files with <code>spirv-dis</code> <p>The <code>.spv</code> files use the SPIR-V binary format, which can be disassembled using <code>spirv-dis</code> from SPIR-V Tools:</p> <pre><code>$ cd /tmp/iree/simple_abs/\n$ spirv-dis module_abs_dispatch_0_vulkan_spirv_fb.spv\n\n; SPIR-V\n; Version: 1.0\n; Generator: Khronos; 22\n; Bound: 20\n; Schema: 0\n              OpCapability Shader\n              OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n        %18 = OpExtInstImport \"GLSL.std.450\"\n              OpMemoryModel Logical GLSL450\n              OpEntryPoint GLCompute %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n              OpExecutionMode %abs_dispatch_0_generic LocalSize 1 1 1\n              OpName %__resource_var_0_0_ \"__resource_var_0_0_\"\n              OpName %__resource_var_0_1_ \"__resource_var_0_1_\"\n              OpName %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n              OpDecorate %_arr_float_uint_1 ArrayStride 4\n              OpMemberDecorate %_struct_2 0 Offset 0\n              OpDecorate %_struct_2 Block\n              OpDecorate %__resource_var_0_0_ Binding 0\n              OpDecorate %__resource_var_0_0_ DescriptorSet 0\n              OpDecorate %__resource_var_0_1_ Binding 1\n              OpDecorate %__resource_var_0_1_ DescriptorSet 0\n      %float = OpTypeFloat 32\n      %uint = OpTypeInt 32 0\n    %uint_1 = OpConstant %uint 1\n%_arr_float_uint_1 = OpTypeArray %float %uint_1\n  %_struct_2 = OpTypeStruct %_arr_float_uint_1\n%_ptr_StorageBuffer__struct_2 = OpTypePointer StorageBuffer %_struct_2\n%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n      %void = OpTypeVoid\n          %9 = OpTypeFunction %void\n    %uint_0 = OpConstant %uint 0\n%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float\n%abs_dispatch_0_generic = OpFunction %void None %9\n        %12 = OpLabel\n        %15 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %uint_0\n        %16 = OpLoad %float %15\n        %17 = OpExtInst %float %18 FAbs %16\n        %19 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %uint_0\n              OpStore %19 %17\n              OpReturn\n              OpFunctionEnd\n</code></pre> <pre><code>$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=cuda \\\n  --iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cuda.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0_cuda_nvptx_fb_benchmark.mlir\nmodule_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.linked.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.optimized.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.ptx\nmodule_abs_dispatch_0.mlir\nsimple_abs_cuda.vmfb\n</code></pre> Tip - Disassembling <code>.bc</code> files with <code>llvm-dis</code> <p>The <code>.bc</code> intermediate files use the LLVM BitCode format, which can be disassembled using <code>llvm-dis</code>:</p> <pre><code>// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\n$ cat module_abs_dispatch_0_cuda_nvptx_fb.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\n\ndeclare ptr @malloc(i64)\n\ndeclare void @free(ptr)\n\ndeclare float @__nv_fabsf(float)\n\ndefine void @abs_dispatch_0_generic(ptr noalias readonly align 16 %0, ptr noalias align 16 %1) {\n  %3 = ptrtoint ptr %0 to i64\n  %4 = and i64 %3, 63\n  %5 = icmp eq i64 %4, 0\n  call void @llvm.assume(i1 %5)\n  %6 = ptrtoint ptr %1 to i64\n  %7 = and i64 %6, 63\n  %8 = icmp eq i64 %7, 0\n  call void @llvm.assume(i1 %8)\n  %9 = load float, ptr %0, align 4\n  %10 = call float @__nv_fabsf(float %9)\n  store float %10, ptr %1, align 4\n  ret void\n}\n\n!nvvm.annotations = !{!0, !1, !2, !3}\n\n!0 = !{ptr @abs_dispatch_0_generic, !\"kernel\", i32 1}\n!1 = !{ptr @abs_dispatch_0_generic, !\"maxntidx\", i32 1}\n!2 = !{ptr @abs_dispatch_0_generic, !\"maxntidy\", i32 1}\n!3 = !{ptr @abs_dispatch_0_generic, !\"maxntidz\", i32 1}\n</code></pre>"},{"location":"developers/general/developer-tips/#module-level-executable-benchmarks","title":"Module level executable benchmarks","text":"<p>The benchmark files produced by <code>--iree-hal-dump-executable-benchmarks-to</code> can be compiled in isolation and passed to <code>iree-benchmark-module</code>, where they exercise the full IREE runtime for a single executable:</p> <pre><code>$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-dump-executable-benchmarks-to=/tmp/iree/simple_abs/ \\\n  -o /dev/null\n\n$ iree-compile \\\n  /tmp/iree/simple_abs/module_abs_dispatch_0_embedded_elf_x86_64_benchmark.mlir \\\n  -o /tmp/iree/simple_abs/module_abs_dispatch_0_benchmark.vmfb\n\n$ iree-benchmark-module \\\n  /tmp/iree/simple_abs/module_abs_dispatch_0_benchmark.vmfb\n</code></pre>"},{"location":"developers/general/developer-tips/#low-level-executable-binary-benchmarks","title":"Low level executable binary benchmarks","text":"<p>The binary files produced by <code>--iree-hal-dump-executable-binaries-to</code> can be passed to <code>iree-benchmark-executable</code> where they are benchmarked directly, without using the IREE VM, HAL APIs, task system, etc. Note that this interface is much lower level and you must specify all push constants / binding parameters manually:</p> <pre><code>$ iree-compile \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-dump-executable-binaries-to=/tmp/iree/simple_abs/ \\\n  -o /dev/null\n\n$ iree-benchmark-executable \\\n  --device=local-sync \\\n  --executable_format=embedded-elf-x86_64 \\\n  --executable_file=/tmp/iree/simple_abs/module_abs_dispatch_0_embedded_elf_x86_64.so \\\n  --entry_point=0 \\\n  --binding=f32=-2.5 \\\n  --binding=f32=0 \\\n  --workgroup_count=1,1,1\n</code></pre> <p>See the comments in <code>tools/iree-benchmark-executable-main.c</code> and the test file at <code>tools/test/iree-benchmark-executable.mlir</code> for more information and examples.</p>"},{"location":"developers/general/developer-tips/#compiling-phase-by-phase","title":"Compiling phase by phase","text":"<p>IREE compiles programs through a series of broad phases:</p> <pre><code>graph LR\n  accTitle: Compilation phases overview\n  accDescr: Input to ABI to Flow to Stream to HAL to VM\n\n  A([Input])\n  A --&gt; B([ABI])\n  B --&gt; C([Flow])\n  C --&gt; D([Stream])\n  D --&gt; E([HAL])\n  E --&gt; F([VM])</code></pre> Tip - available phases <p>These are the phase names available for use with the <code>--compile-to</code> and <code>--compile-from</code> flags described below:</p> Phase name Description <code>input</code> Performs input processing and lowering into core IREE input dialects (linalg/etc) <code>abi</code> Adjusts the program ABI for the specified execution environment <code>preprocessing</code> Applies customizable <code>preprocessing</code> prior to FLow/Stream/HAL/VM <code>flow</code> Models execution data flow and partitioning using the <code>flow</code> dialect <code>stream</code> Models execution partitioning and scheduling using the <code>stream</code> dialect <code>executable-sources</code> Prepares <code>hal</code> dialect executables for translation, prior to codegen <code>executable-targets</code> Runs code generation for <code>hal</code> dialect executables <code>hal</code> Finishes <code>hal</code> dialect processing <code>vm</code> Lowers to IREE's abstract virtual machine using the <code>vm</code> dialect <code>end</code> Completes the full compilation pipeline <p>For an accurate list of phases, see the source code or check the help output with a command such as:</p> <pre><code>iree-compile --help | sed -n '/--compile-to/,/--/p' | head -n -1\n</code></pre> <p>You can output a program snapshot at intermediate phases with the <code>--compile-to=&lt;phase name&gt;</code> flag:</p> <pre><code>$ cat simple_abs.mlir\n\nfunc.func @abs(%input : tensor&lt;f32&gt;) -&gt; (tensor&lt;f32&gt;) {\n  %result = math.absf %input : tensor&lt;f32&gt;\n  return %result : tensor&lt;f32&gt;\n}\n\n$ iree-compile simple_abs.mlir --compile-to=abi\n\nmodule {\n  func.func @abs(%arg0: !hal.buffer_view) -&gt; !hal.buffer_view attributes {iree.abi.stub} {\n    %0 = hal.tensor.import %arg0 \"input 0\" : !hal.buffer_view -&gt; tensor&lt;f32&gt;\n    %1 = math.absf %0 : tensor&lt;f32&gt;\n    %2 = hal.tensor.export %1 \"output 0\" : tensor&lt;f32&gt; -&gt; !hal.buffer_view\n    return %2 : !hal.buffer_view\n  }\n}\n</code></pre> <p>This is similar to the <code>--mlir-print-ir-after=</code> flag, but at clearly defined pipeline phases.</p> <p>Compilation can be continued from any intermediate phase. This allows for interative workflows - compile to a phase, make edits to the <code>.mlir</code> file, then resume compilation and continue through the pipeline:</p> <pre><code>$ iree-compile simple_abs.mlir --compile-to=abi -o simple_abs_abi.mlir\n\n$ sed \\\n  -e 's/math.absf/math.exp/' \\\n  -e 's/@abs/@exp/' \\\n  simple_abs_abi.mlir &gt; simple_exp_abi.mlir\n\n$ iree-compile simple_exp_abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  -o simple_exp_cpu.vmfb\n</code></pre> <p>or explicitly resume from an intermediate phase with <code>--compile-from=&lt;phase name&gt;</code>:</p> <pre><code>$ iree-compile simple_exp_abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --compile-from=abi \\\n  -o simple_exp_cpu.vmfb\n</code></pre>"},{"location":"developers/general/developer-tips/#dumping-compilation-phases","title":"Dumping compilation phases","text":"<p>The <code>--dump-compilation-phases-to</code> flag can be used to dump program IR after each phase, much like <code>--compile-to</code> but without exiting early:</p> <pre><code>$ iree-compile simple_abs.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --dump-compilation-phases-to=/tmp/iree/simple_abs \\\n  -o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs -1v\n\nsimple_abs.1.input.mlir\nsimple_abs.2.abi.mlir\nsimple_abs.3.preprocessing.mlir\nsimple_abs.4.global-optimization.mlir\nsimple_abs.5.flow.mlir\nsimple_abs.6.stream.mlir\nsimple_abs.7.executable-sources.mlir\nsimple_abs.8.executable-configurations.mlir\nsimple_abs.9.executable-targets.mlir\nsimple_abs.10.hal.mlir\nsimple_abs.11.vm.mlir\n</code></pre> <p>As with <code>--compile-to</code>, these files can be used together with <code>--compile-from</code>:</p> <pre><code>$ iree-compile simple_abs.2.abi.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --compile-from=abi \\\n  -o simple_exp_cpu.vmfb\n</code></pre> <p>All together, these passes can be used to, for example:</p> <ul> <li>speed up triage (\"at which phase do we go wrong\")</li> <li>allow for faster development iteration (snapshot all phases at some baseline,   modify the compiler source, then resume from just before where those changes   impact a pipeline)</li> </ul>"},{"location":"developers/general/release-management/","title":"Release management","text":"<p>IREE cuts automated releases via a workflow that is triggered daily. The only constraint placed on the commit that is released is that it has passed all CI checks. These are published on GitHub with the \"pre-release\" status. For debugging this process, see the Release debugging playbook.</p> <p>We periodically promote one of these candidates to a \"stable\" release by removing the \"pre-release\" status. This makes it show up as a \"latest\" release on GitHub. We also push the Python packages for this release to PyPI.</p>"},{"location":"developers/general/release-management/#picking-a-candidate-to-promote","title":"Picking a candidate to promote","text":"<p>When selecting a candidate we use the following criteria:</p> <ol> <li>\u2a864 days old so that problems with it may have been spotted</li> <li>Contains no P0 regressions vs the previous stable release</li> <li>LLVM submodule commit ideally exists upstream (no cherry picks or patches)</li> </ol> <p>When you've identified a potential candidate, email the iree-discuss list with the proposal and solicit feedback. People may point out known regressions or request that some feature make the cut.</p>"},{"location":"developers/general/release-management/#promoting-a-candidate-to-stable","title":"Promoting a candidate to stable","text":"<ol> <li> <p>(Authorized users only) Push to PyPI using     pypi_deploy.sh</p> <ul> <li>For Googlers, the password is stored at http://go/iree-pypi-password</li> </ul> </li> <li> <p>Open the release on GitHub. Rename the release from \"candidate\" to \"stable\",     uncheck the option for \"pre-release\", and check the option for \"latest\".</p> <p></p> <p></p> </li> </ol>"},{"location":"developers/general/testing-guide/","title":"Testing guide","text":"<p>Like the IREE project in general, IREE tests are divided into a few different components and use different tooling depending on the needs of that component.</p> Test type Test Build system Supported platforms Compiler tests iree_lit_test Bazel/CMake Host Runtime tests iree_cc_test Bazel/CMake Host/Device iree_native_test Bazel/CMake Host/Device iree_hal_cts_test_suite CMake Host/Device Core E2E tests iree_check_test Bazel/CMake Host/Device iree_static_linker_test CMake Host/Device <p>There are also more <code>*_test_suite</code> targets that groups test targets with the same configuration together.</p>"},{"location":"developers/general/testing-guide/#compiler-tests","title":"Compiler tests","text":"<p>Tests for the IREE compilation pipeline are written as lit tests in the same style as MLIR.</p> <p>By convention, IREE includes tests for</p> <ul> <li>printing and parsing of ops in <code>.../IR/test/{OP_CATEGORY}_ops.mlir</code> files</li> <li>folding and canonicalization in <code>.../IR/test/{OP_CATEGORY}_folding.mlir</code> files</li> <li>compiler passes and pipelines in other <code>.../test/*.mlir</code> files</li> </ul>"},{"location":"developers/general/testing-guide/#running-a-test","title":"Running a test","text":"<p>For the test <code>iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir</code></p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir.test\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test //compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/test:arithmetic_ops.mlir.test\n</code></pre>"},{"location":"developers/general/testing-guide/#writing-a-test","title":"Writing a test","text":"<p>For advice on writing MLIR compiler tests, see the MLIR testing guide. Tests should be <code>.mlir</code> files in <code>test</code> directory adjacent to the functionality they are testing. Instead of <code>mlir-opt</code>, use <code>iree-opt</code>, which registers IREE dialects and passes and doesn't register some unnecessary core ones.</p> <p>As with most parts of the IREE compiler, these should not have a dependency on the runtime.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system","title":"Configuring the build system","text":"<p>In the Bazel BUILD file, create a <code>iree_lit_test_suite</code> rule. We usually create a single suite that globs all <code>.mlir</code> files in the directory and is called \"lit\".</p> <pre><code>load(\"//iree/build_tools/bazel:iree_lit_test.bzl\", \"iree_lit_test_suite\")\n\niree_lit_test_suite(\n    name = \"lit\",\n    srcs = glob([\"*.mlir\"]),\n    tools = [\n        \"@llvm-project//llvm:FileCheck\",\n        \"//tools:iree-opt\",\n    ],\n)\n</code></pre> <p>There is a corresponding CMake function, calls to which will be generated by our Bazel to CMake converter.</p> <pre><code>iree_lit_test_suite(\n  NAME\n    lit\n  SRCS\n    \"arithmetic_ops.mlir\"\n  DATA\n    FileCheck\n    iree-opt\n)\n</code></pre> <p>You can also create a test for a single file with <code>iree_lit_test</code>.</p>"},{"location":"developers/general/testing-guide/#runtime-tests","title":"Runtime tests","text":"<p>Tests for the runtime C++ code use the GoogleTest testing framework. They should generally follow the style and best practices of that framework.</p>"},{"location":"developers/general/testing-guide/#running-a-test_1","title":"Running a test","text":"<p>For the test <code>/runtime/src/iree/base/bitfield_test.cc</code>:</p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R iree/base/bitfield_test\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test //runtime/src/iree/base:arena_test\n</code></pre>"},{"location":"developers/general/testing-guide/#setting-test-environments","title":"Setting test environments","text":"<p>Parallel testing for <code>ctest</code> can be enabled via the <code>CTEST_PARALLEL_LEVEL</code> environment variable. For example:</p> <pre><code>export CTEST_PARALLEL_LEVEL=$(nproc)\n</code></pre> <p>To use the Vulkan backend as test driver, you may need to select between a Vulkan implementation from SwiftShader and multiple Vulkan-capable hardware devices. This can be done via environment variables. See the generic Vulkan setup page for details regarding these variables.</p> <p>For Bazel, you can persist the configuration in <code>user.bazelrc</code> to save typing. For example:</p> <pre><code>test:vkswiftshader --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vkswiftshader --test_env=\"VK_LAYER_PATH=...\"\ntest:vknative --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vknative --test_env=\"VK_LAYER_PATH=...\"\n</code></pre> <p>Then you can use <code>bazel test --config=vkswiftshader</code> to select SwiftShader as the Vulkan implementation. Similarly for other implementations.</p>"},{"location":"developers/general/testing-guide/#writing-a-test_1","title":"Writing a test","text":"<p>For advice on writing tests in the GoogleTest framework, see the GoogleTest primer. Test files for source file <code>foo.cc</code> with build target <code>foo</code> should live in the same directory with source file <code>foo_test.cc</code> and build target <code>foo_test</code>. You should <code>#include</code> <code>iree/testing/gtest.h</code> instead of any of the gtest or gmock headers.</p> <p>As with all parts of the IREE runtime, these should not have a dependency on the compiler.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system_1","title":"Configuring the build system","text":"<p>In the Bazel BUILD file, create a <code>cc_test</code> target with your test file as the source and any necessary dependencies. Usually, you can link in a standard gtest main function. Use <code>iree/testing:gtest_main</code> instead of the <code>gtest_main</code> that comes with gtest.</p> <pre><code>cc_test(\n    name = \"arena_test\",\n    srcs = [\"arena_test.cc\"],\n    deps = [\n        \":arena\",\n        \"//iree/testing:gtest_main\",\n    ],\n)\n</code></pre> <p>We have created a corresponding CMake function <code>iree_cc_test</code> that mirrors the Bazel rule's behavior. Our Bazel to CMake converter should generally derive the <code>CMakeLists.txt</code> file from the BUILD file:</p> <pre><code>iree_cc_test(\n  NAME\n    arena_test\n  SRCS\n    \"arena_test.cc\"\n  DEPS\n    ::arena\n    iree::testing::gtest_main\n)\n</code></pre> <p>There are other more specific test targets, such as <code>iree_hal_cts_test_suite</code>, which are designed to test specific runtime support with template configuration and is not supported by Bazel rules.</p>"},{"location":"developers/general/testing-guide/#iree-core-end-to-end-e2e-tests","title":"IREE core end-to-end (e2e) tests","text":"<p>Here \"end-to-end\" means from the input accepted by the IREE core compiler (dialects like TOSA, StableHLO, Linalg) to execution using the IREE runtime components. It does not include tests of the integrations with ML frameworks (e.g. TensorFlow, PyTorch) or bindings to other languages (e.g. Python).</p> <p>We avoid using the more traditional <code>lit</code> tests used elsewhere in the compiler for runtime execution tests. Lit tests require running the compiler tools on the test platform through shell or python scripts that act on files from a local file system. On platforms like Android, the web, and embedded systems, each of these features is either not available or is severely limited.</p> <p>Instead, to test these flows we use a custom framework called <code>check</code>. The check framework compiles test programs on the host machine into standalone test binary files that can be pushed to test devices (such as Android phones) where they run with gtest style assertions (e.g. <code>check.expect_almost_eq(lhs, rhs)</code>).</p>"},{"location":"developers/general/testing-guide/#building-e2e-tests","title":"Building e2e tests","text":"<p>The files needed by these tests are not built by default with CMake. You'll need to build the special <code>iree-test-deps</code> target to generate test files prior to running CTest (from the build directory):</p> <pre><code>cmake --build . --target iree-test-deps\n</code></pre> <p>To run e2e model tests in generated_e2e_model_tests.cmake, because of their dependencies, <code>-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON</code> needs to be set when configuring CMake. Also see IREE Benchmark Suite Prerequisites for required packages.</p>"},{"location":"developers/general/testing-guide/#running-a-test_2","title":"Running a Test","text":"<p>For the test <code>tests/e2e/stablehlo_ops/floor.mlir</code> compiled for the VMVX target backend and running on the VMVX driver (here they match exactly, but in principle there's a many-to-many mapping from backends to drivers).</p> <p>With CMake, run this from the build directory:</p> <pre><code>ctest -R tests/e2e/stablehlo_ops/check_vmvx_local-task_floor.mlir\n</code></pre> <p>With Bazel, run this from the repo root:</p> <pre><code>bazel test tests/e2e/stablehlo_ops:check_vmvx_local-task_floor.mlir\n</code></pre>"},{"location":"developers/general/testing-guide/#setting-test-environments_1","title":"Setting test environments","text":"<p>Similarly, you can use environment variables to select Vulkan implementations for running tests as explained in the Runtime tests section.</p>"},{"location":"developers/general/testing-guide/#writing-a-test_2","title":"Writing a test","text":"<p>These tests live in <code>tests/e2e</code>. A single test consists of a <code>.mlir</code> source file specifying an IREE module where each exported function takes no inputs and returns no results and corresponds to a single test case.</p> <p>As an example, here are some tests for the MHLO floor operation:</p> <pre><code>func.func @tensor() {\n  %input = util.unfoldable_constant dense&lt;[0.0, 1.1, 2.5, 4.9]&gt; : tensor&lt;4xf32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;[0.0, 1.0, 2.0, 4.0]&gt; : tensor&lt;4xf32&gt;): tensor&lt;4xf32&gt;\n  return\n}\n\nfunc.func @scalar() {\n  %input = util.unfoldable_constant dense&lt;101.3&gt; : tensor&lt;f32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;101.0&gt; : tensor&lt;f32&gt;): tensor&lt;f32&gt;\n  return\n}\n\nfunc.func @negative() {\n  %input = util.unfoldable_constant dense&lt;-1.1&gt; : tensor&lt;f32&gt;\n  %result = \"mhlo.floor\"(%input) : (tensor&lt;f32&gt;) -&gt; tensor&lt;f32&gt;\n  check.expect_almost_eq_const(%result, dense&lt;-2.0&gt; : tensor&lt;f32&gt;): tensor&lt;f32&gt;\n  return\n}\n</code></pre> <p>Test cases are created in gtest for each public function exported by the module.</p> <p>Note the use of <code>util.unfoldable_constant</code> to specify test constants. If we were to use a regular constant the compiler would fold away everything at compile time and our test would not actually test the runtime. <code>unfoldable_constant</code> adds a barrier that prevents folding. To prevent folding/constant propagate on an arbitrary SSA-value you can use <code>util.optimization_barrier</code>.</p> <p>Next we use this input constant to exercise the runtime feature under test (in this case, just a single floor operation). Finally, we use a check dialect operation to make an assertion about the output. There are a few different assertion operations. Here we use the <code>expect_almost_eq_const</code> op: almost because we are comparing floats and want to allow for floating-point imprecision, and const because we want to compare it to a constant value. This last part is just syntactic sugar around</p> <pre><code>%expected = arith.constant dense&lt;101.0&gt; : tensor&lt;f32&gt;\ncheck.expect_almost_eq(%result, %expected) : tensor&lt;f32&gt;\n</code></pre> <p>The output of running this test looks like:</p> <pre><code>[==========] Running 4 tests from 1 test suite.\n[----------] Global test environment set-up.\n[----------] 4 tests from module\n[ RUN      ] module.tensor\n[       OK ] module.tensor (76 ms)\n[ RUN      ] module.scalar\n[       OK ] module.scalar (79 ms)\n[ RUN      ] module.double\n[       OK ] module.double (55 ms)\n[ RUN      ] module.negative\n[       OK ] module.negative (54 ms)\n[----------] 4 tests from module (264 ms total)\n\n[----------] Global test environment tear-down\n[==========] 4 tests from 1 test suite ran. (264 ms total)\n[  PASSED  ] 4 tests.\n</code></pre> <p>The \"module\" name for the test suite comes from the default name for an implicit MLIR module. To give the test suite a more descriptive name, use an explicit named top-level module in this file.</p>"},{"location":"developers/general/testing-guide/#configuring-the-build-system_2","title":"Configuring the build system","text":"<p>A single <code>.mlir</code> source file can be turned into a test target with the <code>iree_check_test</code> Bazel macro (and corresponding CMake function).</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test\")\n\niree_check_test(\n    name = \"check_vmvx_local-task_floor.mlir\",\n    src = \"floor.mlir\",\n    driver = \"local-task\",\n    target_backend = \"vmvx\",\n)\n</code></pre> <p>The target naming convention is \"check_backend_driver_src\". The generated test will automatically be tagged with a \"driver=vmvx\" tag, which can help filter tests by backend (especially when many tests are generated, as below).</p> <p>Usually we want to create a suite of tests across many backends and drivers. This can be accomplished with additional macros. For a single backend/driver pair:</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_single_backend_test_suite\")\n\niree_check_single_backend_test_suite(\n    name = \"check_vmvx_local-task\",\n    srcs = glob([\"*.mlir\"]),\n    driver = \"local-task\",\n    target_backend = \"vmvx\",\n)\n</code></pre> <p>This will generate a separate test target for each file in <code>srcs</code> with a name following the convention above as well as a Bazel test_suite called \"check_vmvx_local-task\" that will run all the generated tests.</p> <p>You can also generate suites across multiple pairs:</p> <pre><code>load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test_suite\")\n\niree_check_test_suite(\n    name = \"check\",\n    srcs = [\"success.mlir\"],\n    # Leave this argument off to run on all supported backend/driver pairs.\n    target_backends_and_drivers = [\n        (\"vmvx\", \"local-task\"),\n        (\"vulkan-spirv\", \"vulkan\"),\n    ],\n)\n</code></pre> <p>This will create a test per source file and backend/driver pair, a test suite per backend/driver pair, and a test suite, \"check\", that will run all the tests.</p> <p>The CMake functions follow a similar pattern. The calls to them are generated in our <code>CMakeLists.txt</code> file by bazel_to_cmake.</p> <p>There are other test targets that generate tests based on template configuraton and platform detection, such as <code>iree_static_linker_test</code>. Those targets are not supported by Bazel rules at this point.</p>"},{"location":"developers/general/testing-guide/#external-test-suite","title":"External test suite","text":"<p>An out-of-tree test suite is under development at nod-ai/SHARK-TestSuite for large collections of generated tests and machine learning models that are too large to fit into the main git repository.</p> <p>Testing these programs follows several stages:</p> <pre><code>graph LR\n  Import -. \"\\n(offline)\" .-&gt; Compile\n  Compile --&gt; Run</code></pre> <p>This particular test suite treats importing (e.g. from ONNX, PyTorch, or TensorFlow) as an offline step and contains test cases organized into folders of programs, inputs, and expected outputs:</p> Sample test case directory<pre><code>test_case_name/\n  model.mlir\n  input_0.npy\n  output_0.npy\n  test_data_flags.txt\n</code></pre> Sample test_data_flags.txt<pre><code>--input=@input_0.npy\n--expected_output=@output_0.npy\n</code></pre> <ul> <li>Many model, input, and output files are too large to store directly in Git, so the external test suite also uses Git LFS and cloud storage.</li> </ul> <p>Each test case can be run using a sequence of commands like:</p> <pre><code>iree-compile model.mlir {flags} -o model.vmfb\niree-run-module --module=model.vmfb --flagfile=test_data_flags.txt\n</code></pre> <p>To run slices of the test suite, a pytest runner is included that can be configured using JSON files. The JSON files tested in the IREE repo itself are stored in <code>build_tools/pkgci/external_test_suite/</code>.</p> <p>For example, here is part of a config file for running ONNX tests on CPU:</p> build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json<pre><code>{\n  \"config_name\": \"cpu_llvm_sync\",\n  \"iree_compile_flags\": [\n    \"--iree-hal-target-backends=llvm-cpu\"\n  ],\n  \"iree_run_module_flags\": [\n    \"--device=local-sync\"\n  ],\n  \"skip_compile_tests\": [\n    \"test_dequantizelinear\",\n    \"test_slice_default_axes\"\n  ],\n  \"skip_run_tests\": [],\n  \"expected_compile_failures\": [\n    \"test_acos\",\n    \"test_acos_example\",\n    \"test_acosh\",\n    \"test_acosh_example\",\n    \"test_adagrad\",\n    \"test_adagrad_multiple\",\n</code></pre>"},{"location":"developers/general/testing-guide/#adding-new-test-cases","title":"Adding new test cases","text":"<p>To add new test cases to the external test suite:</p> <ol> <li>Import the programs you want to test into MLIR. This can be done manually or    using automation. Prefer to automate, or at least document, the process so    test cases can be regenerated later.</li> <li>Construct sets of inputs and expected outputs (as .npy or .bin files). These    can be manually authored or imported by running the program through a    reference backend.</li> <li>Group the program, inputs, and outputs together using a flagfile.</li> </ol> <p>To start running new test cases:</p> <ol> <li>Bump the commit of the test suite that is used in IREE's    <code>.github/workflows/</code> files</li> <li>Add new pytest invocations and/or config files that run the new tests</li> </ol>"},{"location":"developers/general/testing-guide/#usage-from-other-projects","title":"Usage from other projects","text":"<p>The external test suite only needs <code>iree-compile</code> and <code>iree-run-module</code> to run, so it is well suited for use in downstream projects that implement plugins for IREE. The <code>conftest.py</code> file can also be forked (or bypassed entirely) to further customize the test runner behavior.</p>"},{"location":"developers/performance/benchmark-suites/","title":"Benchmark suites","text":"<p>IREE Benchmarks Suites is a collection of benchmarks for IREE developers to track performance improvements/regressions during development.</p> <p>The benchmark suites are run for each commit on the main branch and the results are uploaded to https://perf.iree.dev for regression analysis (for the current supported targets). On pull requests, users can add labels <code>benchmarks:*</code> to trigger the benchmark runs. The results will be compared with https://perf.iree.dev and post in the comments.</p> <p>Information about the definitions of the benchmark suites can be found in the IREE Benchmark Suites Configurations.</p>"},{"location":"developers/performance/benchmark-suites/#running-benchmark-suites-locally","title":"Running benchmark suites locally","text":""},{"location":"developers/performance/benchmark-suites/#prerequisites","title":"Prerequisites","text":"<p>Install <code>iree-import-tf</code> and <code>iree-import-tflite</code> in your Python environment (see Tensorflow Integration and TFLite Integration).</p>"},{"location":"developers/performance/benchmark-suites/#choose-benchmark-presets","title":"Choose benchmark presets","text":"<p>IREE Benchmark Suites contain many benchmarks for different devices and model sizes, which can take lots of space and time to build all of them. So benchmarks are grouped into presets to allow building and running only a subset of them. The available presets are:</p> <p>Execution benchmarks:</p> <ul> <li><code>android-cpu</code>: benchmarks for mobile CPUs</li> <li><code>android-gpu</code>: benchmarks for mobile GPUs</li> <li><code>cuda</code>: benchmarks for CUDA with a small model set</li> <li><code>cuda-large</code>: benchmarks for CUDA with a large model set</li> <li><code>vulkan-nvidia</code>: benchmarks for Vulkan on NVIDIA graphics cards</li> <li><code>x86_64</code>: benchmarks for x86_64 CPUs with a small model set</li> <li><code>x86_64-large</code>: benchmarks for x86_64 with a large model set</li> </ul> <p>Compilation benchmarks (to collect compilation statistics, such as module sizes):</p> <ul> <li><code>comp-stats</code>: compilation benchmarks with a small model set</li> <li><code>comp-stats-large</code>: compilation benchmark with a large model set</li> </ul> <p>Note that <code>*-large</code> presets will download and build a few hundreds GBs of artifacts.</p> <p>Set the environment variables of benchmark presets for the steps below, for example:</p> <pre><code>export EXECUTION_BENCHMARK_PRESETS=\"cuda,x86_64\"\nexport COMPILATION_BENCHMARK_PRESETS=\"comp-stats\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#build-benchmark-suites","title":"Build benchmark suites","text":"<p>Configure IREE with <code>-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON</code>:</p> <pre><code>cmake -GNinja -B \"${IREE_BUILD_DIR?}\" -S \"${IREE_REPO?}\" \\\n  -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n  -DCMAKE_C_COMPILER=clang \\\n  -DCMAKE_CXX_COMPILER=clang++ \\\n  -DIREE_ENABLE_LLD=ON \\\n  -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON\n</code></pre> <p>If you only need the imported MLIR models:</p> <pre><code>cmake --build \"${IREE_BUILD_DIR?}\" --target \\\n  iree-benchmark-import-models\n  # For large benchmarks (this will take &gt; 100G disk space)\n  # iree-benchmark-import-models-large\n</code></pre> <p>Otherwise, compile the benchmark suites and tools for benchmarking:</p> <pre><code>cmake --build \"${IREE_BUILD_DIR?}\" --target \\\n  iree-benchmark-suites \\\n  # If any *-large preset is enabled, also build this target:\n  # iree-benchmark-suites-large \\\n  iree-benchmark-module\nexport E2E_TEST_ARTIFACTS_DIR=\"${IREE_BUILD_DIR?}/e2e_test_artifacts\"\n</code></pre> <p>TODO(#13683): Each preset should have its own target to further reduce unnecessary builds</p>"},{"location":"developers/performance/benchmark-suites/#run-benchmarks","title":"Run benchmarks","text":"<p>Export the execution benchmark config:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py execution \\\n  --benchmark_presets=\"${EXECUTION_BENCHMARK_PRESETS?}\" \\\n  &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n</code></pre> <p>Run benchmarks (currently only support running on a Linux host):</p> <pre><code>build_tools/benchmarks/run_benchmarks_on_linux.py \\\n  --normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --target_device_name=\"&lt;target_device_name, e.g. c2-standard-60&gt;\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n  --verbose \\\n  --cpu_uarch=\"&lt;host CPU uarch, e.g. CascadeLake&gt;\"\n# Traces can be collected by adding:\n# --traced_benchmark_tool_dir=\"${IREE_TRACED_BUILD_DIR?}/tools\" \\\n# --trace_capture_tool=/path/to/iree-tracy-capture \\\n# --capture_tarball=captured_tracy_files.tar.gz\n</code></pre> <p>Note that:</p> <ul> <li><code>&lt;target_device_name&gt;</code> selects a benchmark group targets a specific device:<ul> <li>Common options:<ul> <li><code>c2-standard-60</code> for x86_64 CPU benchmarks.</li> <li><code>a2-highgpu-1g</code> for NVIDIA GPU benchmarks.</li> </ul> </li> <li>All device names are defined under     build_tools/python/e2e_test_framework/device_specs.</li> </ul> </li> <li>To run x86_64 benchmarks, right now <code>--cpu_uarch</code> needs to be provided and     only <code>CascadeLake</code> is available currently.</li> <li>To build traced benchmark tools, see     Profiling with Tracy.</li> </ul> <p>Filters can be used to select the benchmarks:</p> <pre><code>build_tools/benchmarks/run_benchmarks_on_linux.py \\\n  --normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --target_device_name=\"c2-standard-60\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n  --verbose \\\n  --cpu_uarch=\"CascadeLake\" \\\n  --model_name_regex=\"MobileBert*\" \\\n  --driver_filter_regex='local-task' \\\n  --mode_regex=\"4-thread\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#generate-compilation-statistics-compilation-benchmarks","title":"Generate compilation statistics (compilation benchmarks)","text":"<p>Export the compilation benchmark config:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py compilation \\\n  --benchmark_presets=\"${COMPILATION_BENCHMARK_PRESETS?}\" \\\n  &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre> <p>Generate the compilation statistics:</p> <pre><code>build_tools/benchmarks/collect_compilation_statistics.py \\\n  --compilation_benchmark_config=comp_config.json \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --build_log=\"${IREE_BUILD_DIR?}/.ninja_log\" \\\n  --output=\"${E2E_TEST_ARTIFACTS_DIR?}/compile_stats_results.json\"\n</code></pre> <p>Note that you need to use Ninja to build the benchmark suites as the tool collects information from its build log.</p>"},{"location":"developers/performance/benchmark-suites/#show-execution-compilation-benchmark-results","title":"Show execution / compilation benchmark results","text":"<p>If you want to generate a comparison report locally, you can use diff_local_benchmarks.py script to compare two result json files and generate the report. For example:</p> <pre><code>build_tools/benchmarks/diff_local_benchmarks.py \\\n  --base \"${E2E_TEST_ARTIFACTS_DIR?}/before_benchmark_results.json\" \\\n  --target \"${E2E_TEST_ARTIFACTS_DIR?}/after_benchmark_results.json\" \\\n  &gt; report.md\n</code></pre> <p>An example that compares compilation statistics:</p> <pre><code>build_tools/benchmarks/diff_local_benchmarks.py \\\n  --base-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/before_compile_stats_results.json\" \\\n  --target-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/after_compile_stats_results.json\" \\\n  &gt; report.md\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#find-compile-and-run-commands-to-reproduce-benchmarks","title":"Find compile and run commands to reproduce benchmarks","text":"<p>Each benchmark has its benchmark ID in the benchmark suites, you will see a benchmark ID at:</p> <ul> <li>In the serie's URL of https://perf.iree.dev<ul> <li>Execution benchmark: <code>https://perf.iree.dev/serie?IREE?&lt;benchmark_id&gt;</code></li> <li>Compilation benchmark:     <code>https://perf.iree.dev/serie?IREE?&lt;benchmark_id&gt;-&lt;metric_id&gt;</code></li> </ul> </li> <li>In <code>benchmark_results.json</code> and <code>compile_stats_results.json</code><ul> <li>Execution benchmark result has a field <code>run_config_id</code></li> <li>Compilation benchmark result has a field <code>gen_config_id</code></li> </ul> </li> <li>In PR benchmark summary or the markdown generated by     <code>diff_local_benchmarks.py</code>, each benchmark has the link to its     https://perf.iree.dev URL, which includes the benchmark ID.</li> </ul> <p>If you don't have artifacts locally, see Fetching Benchmark Artifacts from CI to find the GCS directory of the CI artifacts. Then fetch the needed files:</p> <pre><code># Get ${E2E_TEST_ARTIFACTS_DIR_URL} from \"Fetching Benchmark Artifacts from CI\".\nexport E2E_TEST_ARTIFACTS_DIR=\"e2e_test_artifacts\"\n\n# Download all artifacts\nmkdir \"${E2E_TEST_ARTIFACTS_DIR?}\"\ngcloud storage cp -r \"${E2E_TEST_ARTIFACTS_DIR_URL?}\" \"${E2E_TEST_ARTIFACTS_DIR?}\"\n</code></pre> <p>Run the helper tool to dump benchmark commands from benchmark configs:</p> <pre><code>build_tools/benchmarks/benchmark_helper.py dump-cmds \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/execution-benchmark-config.json\" \\\n  --compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/compilation-benchmark-config.json\" \\\n  --e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n  --benchmark_id=\"&lt;benchmark_id&gt;\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#get-full-list-of-benchmarks","title":"Get full list of benchmarks","text":"<p>The commands below output the full list of execution and compilation benchmarks, including the benchmark names and their flags:</p> <pre><code>build_tools/benchmarks/export_benchmark_config.py execution &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\nbuild_tools/benchmarks/export_benchmark_config.py compilation &gt; \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\nbuild_tools/benchmarks/benchmark_helper.py dump-cmds \\\n  --execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n  --compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre>"},{"location":"developers/performance/benchmark-suites/#fetching-benchmark-artifacts-from-ci","title":"Fetching benchmark Artifacts from CI","text":""},{"location":"developers/performance/benchmark-suites/#1-find-the-corresponding-ci-workflow-run","title":"1. Find the corresponding CI workflow run","text":"<p>On the commit of the benchmark run, you can find the list of the workflow jobs by clicking the green check mark. Click any job starts with <code>CI /</code>:</p> <p></p>"},{"location":"developers/performance/benchmark-suites/#2-get-urls-of-gcs-artifacts","title":"2. Get URLs of GCS artifacts","text":"<p>On the CI page, click <code>Summary</code> on the top-left to open the summary page. Scroll down and the links to artifacts are listed in a section titled \"Artifact Links\". Paste the content in your shell to define all needed variables for the following steps:</p> <p></p>"},{"location":"developers/performance/benchmark-suites/#3-fetch-the-benchmark-artifacts","title":"3. Fetch the benchmark artifacts","text":"<p>To fetch files from the GCS URL, the gcloud CLI tool (https://cloud.google.com/sdk/docs/install) can list the directory contents and download files (see https://cloud.google.com/sdk/gcloud/reference/storage for more usages). If you want to use CI artifacts to reproduce benchmarks locally, see Find Compile and Run Commands to Reproduce Benchmarks.</p> <p>Assume you get the GCS URL variables from Get URLs of GCS artifacts.</p> <p>Download artifacts:</p> <pre><code># The GCS directory has the same structure as your local ${IREE_BUILD_DIR?}/e2e_test_artifacts.\ngcloud storage ls \"${E2E_TEST_ARTIFACTS_DIR_URL?}\"\n\n# Download all source and imported MLIR files:\ngcloud storage cp \"${E2E_TEST_ARTIFACTS_DIR_URL?}/*.mlir\" \"&lt;target_dir&gt;\"\n</code></pre> <p>Execution and compilation benchmark configs can be downloaded at:</p> <pre><code># Execution benchmark config:\ngcloud storage cp \\\n  \"${E2E_TEST_ARTIFACTS_DIR_URL?}/execution-benchmark-config.json\" \\\n  \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n\n# Compilation benchmark config:\ngcloud storage cp \\\n  \"${E2E_TEST_ARTIFACTS_DIR_URL?}/compilation-benchmark-config.json\" \\\n  \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n</code></pre> <p>Benchmark raw results and traces can be downloaded at:</p> <pre><code># Execution benchmark raw results\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-results-*.json\" .\n\n# Optional: Merge raw results into a single file\nbuild_tools/benchmarks/benchmark_helper.py merge-results benchmark-results-*.json &gt; benchmark_results.json\n\n# Execution benchmark traces\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-traces-*.tar.gz\" .\n\n# Compilation benchmark results\ngcloud storage cp \"${COMPILATION_BENCHMARK_RESULTS_URL?}\" .\n</code></pre>"},{"location":"developers/performance/benchmarking/","title":"Benchmarking","text":"<p>IREE uses benchmarks to inspect performance at varying levels of granularity. Benchmarking is implemented using the Google Benchmark library. To understand performance details and guide optimization, please refer to the IREE profiling documentation.</p>"},{"location":"developers/performance/benchmarking/#module-benchmarks","title":"Module Benchmarks","text":"<p><code>iree-benchmark-module</code> is a program accepting (almost) the same inputs as <code>iree-run-module</code> that will benchmark the invocation of a single entry function. It measures timing for the whole process of invoking a function through the VM, including allocating and freeing output buffers. This is a high-level benchmark of an entire invocation flow. It provides a big picture view, but depends on many different variables, like an integration test. For finer-grained measurements more akin to unit tests, see Executable Benchmarks.</p> <p>To use <code>iree-benchmark-module</code>, generate an IREE module for the target backend:</p> <pre><code>$ bazel run //tools:iree-compile -- \\\n  --iree-hal-target-backends=vmvx \\\n  $PWD/samples/models/simple_abs.mlir \\\n  -o /tmp/module.fb\n</code></pre> <p>and then benchmark an exported function in that module:</p> <pre><code>$ bazel run //tools:iree-benchmark-module -- \\\n  --module=/tmp/module.fb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <p>You'll see output like</p> <pre><code>Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n  L1 Data 32K (x6)\n  L1 Instruction 32K (x6)\n  L2 Unified 1024K (x6)\n  L3 Unified 8448K (x1)\nLoad Average: 2.21, 1.93, 3.34\n***WARNING*** CPU scaling is enabled, the benchmark real time measurements may\n be noisy and will incur extra overhead.\n***WARNING*** Library was built as DEBUG. Timings may be affected.\n------------------------------------------------------------------------------\nBenchmark                                    Time             CPU   Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time       0.22 ms         0.23 ms         3356\n</code></pre> <p>Notice that there are a few warnings in there (you may not see all of these). The benchmark library helpfully warns about some common issues that will affect benchmark timing. When trying to obtain real benchmark numbers, you should generally build an optimized build (<code>-c opt</code> in Bazel) and disable CPU scaling.</p> <pre><code>bazel build -c opt //tools:iree-benchmark-module\n</code></pre> <p>Another thing to consider is that depending on where you are running the benchmark you might want to avoid additional programs running at the same time. Bazel itself runs a server even when it's not being actively invoked that can be quite a memory hog, so we'll instead invoke the binary directly. Use your favorite process manager (e.g. htop or pkill on Linux) to kill heavy-weight programs such as Chrome and Bazel.</p> <p>Now we'll actually invoke the binary:</p> <pre><code>$ ./bazel-bin/tools/iree-benchmark-module \\\n  --module=/tmp/module.fb \\\n  --device=local-task \\\n  --function=abs \\\n  --input=f32=-2\n</code></pre> <pre><code>Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n  L1 Data 32K (x6)\n  L1 Instruction 32K (x6)\n  L2 Unified 1024K (x6)\n  L3 Unified 8448K (x1)\nLoad Average: 1.49, 3.42, 3.49\n------------------------------------------------------------------------------\nBenchmark                                    Time             CPU   Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time      0.011 ms        0.014 ms        61654\n</code></pre> <p>Remember to restore CPU scaling when you're done.</p>"},{"location":"developers/performance/benchmarking/#executable-benchmarks","title":"Executable Benchmarks","text":"<p>We also benchmark the performance of individual parts of the IREE system in isolation. IREE breaks a model down to dispatch functions. To benchmark all the dispatch functions, generate an IREE module with the <code>-iree-flow-export-benchmark-funcs</code> flag set:</p> <pre><code>$ build/tools/iree-compile \\\n  --iree-input-type=stablehlo \\\n  --iree-flow-export-benchmark-funcs \\\n  --iree-hal-target-backends=vmvx \\\n  tests/e2e/stablehlo_models/fullyconnected.mlir \\\n  -o /tmp/fullyconnected.vmfb\n</code></pre> <p>and then benchmark all exported dispatch functions (and all exported functions) in that module:</p> <pre><code>$ build/tools/iree-benchmark-module\n  --module=/tmp/fullyconnected.vmfb\n  --device=local-task\n</code></pre> <p>If no <code>entry_function</code> is specified, <code>iree-benchmark-module</code> will register a benchmark for each exported function that takes no inputs.</p> <p>You will see output like:</p> <pre><code>Run on (72 X 3700 MHz CPU s)\nCPU Caches:\n  L1 Data 32 KiB (x36)\n  L1 Instruction 32 KiB (x36)\n  L2 Unified 1024 KiB (x36)\n  L3 Unified 25344 KiB (x2)\nLoad Average: 4.39, 5.72, 6.76\n---------------------------------------------------------------------------------------------\nBenchmark                                                   Time             CPU   Iterations\n---------------------------------------------------------------------------------------------\nBM_main_ex_dispatch_0_benchmark/process_time/real_time  0.030 ms        0.037 ms        34065\nBM_main_ex_dispatch_1_benchmark/process_time/real_time  0.034 ms        0.042 ms        20567\nBM_main_ex_dispatch_2_benchmark/process_time/real_time  0.043 ms        0.051 ms        18576\nBM_main_ex_dispatch_3_benchmark/process_time/real_time  0.029 ms        0.036 ms        21345\nBM_main_ex_dispatch_4_benchmark/process_time/real_time  0.042 ms        0.051 ms        15880\nBM_main_ex_dispatch_5_benchmark/process_time/real_time  0.030 ms        0.037 ms        17854\nBM_main_ex_dispatch_6_benchmark/process_time/real_time  0.043 ms        0.052 ms        14919\nBM_main_benchmark/process_time/real_time                0.099 ms        0.107 ms         5892\n</code></pre>"},{"location":"developers/performance/benchmarking/#bytecode-module-benchmarks","title":"Bytecode Module Benchmarks","text":"<p>Normally, the IREE VM is expected to be integrated into applications and driving model execution. So its performance is of crucial importance. We strive to introduce as little overhead as possible and have several benchmark binaries dedicated for evaluating the VM's performance. These benchmark binaries are named as <code>*_benchmark</code> in the <code>iree/vm/</code> directory. They also use the Google Benchmark library as the above.</p>"},{"location":"developers/performance/benchmarking/#cpu-configuration","title":"CPU Configuration","text":"<p>When benchmarking, it's important to consider the configuration of your CPUs. Most notably, CPU scaling can give variable results, so you'll usually want to disable it. This can get pretty complex, but the most basic thing to do is to run all CPUs at maximum frequency. The other thing to consider is what CPU(s) your program is running on. Both of these get more complicated on mobile and in multithreaded workloads.</p>"},{"location":"developers/performance/benchmarking/#linux","title":"Linux","text":"<p>Google benchmark provides some instructions. Note that the library will print \"CPU scaling is enabled\" warnings for any configuration that doesn't have the quota governor set to performance. Similarly the CPU frequency it reports is the maximum frequency of cpu0, not the frequency of the processor it's actually running on. This means that more advanced configurations should ignore these messages.</p> <p>Turn off CPU scaling before benchmarking.</p> <pre><code>sudo cpupower frequency-set --governor performance\n</code></pre> <p>Restore CPU scaling after benchmarking:</p> <pre><code>sudo cpupower frequency-set --governor powersave\n</code></pre> <p>To learn more about different quota governor settings, see https://www.kernel.org/doc/Documentation/cpu-freq/governors.txt. To restrict which CPUs you run on, use the <code>taskset</code> command which takes a hexadecimal mask.</p> <p>To only run on the lowest-numbered CPU you can run</p> <pre><code>taskset 1 sleep 20 &amp;\n</code></pre> <p>You can confirm that the process is running on the given CPU:</p> <pre><code>ps -o psr $!\n</code></pre> <p>Note that <code>$!</code> indicates the process ID of the last executed background command, so you can only use this shorthand if you didn't run any commands after the sleep. For more info on taskset, see https://linux.die.net/man/1/taskset.</p>"},{"location":"developers/performance/benchmarking/#android","title":"Android","text":"<p>Read and understand the Linux instructions first.</p> <p>Android doesn't give us quite as nice tooling, but the principle is basically the same. One important difference is that thermal throttling is a much bigger concern on mobile. Without a cooling plate, it is likely that high clock speeds will overheat the device and engage thermal throttling, which will ignore whatever clock speeds you may have set to prevent things from catching on fire. Therefore the naive approach above is likely not a good idea.</p> <p>You will likely need to be root (use <code>su</code> or <code>adb root</code>). The commands will depend on your exact phone and number of cores. First play around and make sure you understand what everything means. Note that each CPU has its own files which are used to control its behavior, but changes to a single CPU will sometimes affect others (see <code>/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus</code>).</p> <p>Some useful files:</p> <pre><code>/proc/cpuinfo\n/sys/devices/system/cpu/possible\n/sys/devices/system/cpu/present\n/sys/devices/system/cpu/cpu0/online\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq\n/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed\n</code></pre> <p>See the clockspeed of each CPU</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n    paste \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_cur_freq\" \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_min_freq\" \\\n      \"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_max_freq\"; \\\ndone\n</code></pre> <p>Before changing things, make sure to check the current scaling governor settings first so you can put them back when you're done.</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n    cat \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre>"},{"location":"developers/performance/benchmarking/#single-core-example","title":"Single-Core Example","text":"<p>Here's an example to run IREE in a single-threaded context on CPU 7 at its lowest clock speed.</p> <p>First we'll take control of the clockspeed by setting the governor to \"userspace\".</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n  echo userspace &gt; \\\n    \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre> <p>We can now set individual clock speeds. We'll pin cpu7 to its minimum frequency. We choose the minimum instead of the maximum here to mitigate thermal throttling concerns</p> <pre><code>$ cat /sys/devices/system/cpu/cpu7/cpufreq/cpuinfo_min_freq &gt; \\\n/sys/devices/system/cpu/cpu7/cpufreq/scaling_setspeed\n</code></pre> <p>We can confirm the frequencies of all the CPUs by running the same command above. Now to run a command specifically on cpu7, use <code>taskset 80</code> (hex for 10000000):</p> <pre><code>taskset 80 sleep 20 &amp;\nps -o psr $!\n</code></pre> <p>Remember to cleanup when you're done! Here we'll set the scaling governor back to schedutil because that's what they were before on the particular device this, was tested on, but that may not exist on all devices.</p> <pre><code>$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\n  echo schedutil &gt; \\\n    \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n</code></pre>"},{"location":"developers/performance/benchmarking/#android-scripts","title":"Android Scripts","text":"<p>We provide a few scripts to set clockspeeds on Android (under <code>build_tools/benchmarks</code>). These are somewhat device-specific:</p> <ul> <li>The <code>set_android_scaling_governor.sh</code> work on all CPUs, but the default   governor name may be different across devices.</li> <li>The <code>set_*_gpu_scaling_policy.sh</code> script used should match the actual GPU on   your device.</li> </ul> <p>Sample configuration steps for Pixel 6:</p> <ol> <li>Copy all scripts to the device:</li> </ol> <pre><code>adb push build_tools/benchmarks/*.sh /data/local/tmp\n</code></pre> <ol> <li>Launch interactive adb shell as super user:</li> </ol> <pre><code>adb shell\noriole:/ # su\noriole:/ # cd /data/local/tmp\n</code></pre> <ol> <li>Pin frequencies (high clockspeeds):</li> </ol> <pre><code>oriole:/ # ./set_android_scaling_governor.sh\n CPU info (before changing governor):\n cpu     governor        cur     min     max\n ------------------------------------------------\n cpu0    sched_pixel     1098000 300000  1803000\n cpu1    sched_pixel     1598000 300000  1803000\n cpu2    sched_pixel     1598000 300000  1803000\n cpu3    sched_pixel     1098000 300000  1803000\n cpu4    sched_pixel     400000  400000  2253000\n cpu5    sched_pixel     400000  400000  2253000\n cpu6    sched_pixel     500000  500000  2802000\n cpu7    sched_pixel     500000  500000  2802000\n Setting CPU frequency governor to performance\n CPU info (after changing governor):\n cpu     governor        cur     min     max\n ------------------------------------------------\n cpu0    performance     1803000 300000  1803000\n cpu1    performance     1803000 300000  1803000\n cpu2    performance     1803000 300000  1803000\n cpu3    performance     1803000 300000  1803000\n cpu4    performance     2253000 400000  2253000\n cpu5    performance     2253000 400000  2253000\n cpu6    performance     2802000 500000  2802000\n cpu7    performance     2802000 500000  2802000\noriole:/data/local/tmp # ./set_pixel6_gpu_scaling_policy.sh\n GPU info (before changing frequency scaling policy):\n policy                                  cur     min     max\n --------------------------------------------------------------\n coarse_demand [adaptive] always_on      251000  151000  848000\n Setting GPU frequency scaling policy to performance\n GPU info (after changing frequency scaling policy):\n policy                                  cur     min     max\n --------------------------------------------------------------\n coarse_demand adaptive [always_on]      848000  151000  848000\n</code></pre> <ol> <li>Restore default frequencies:</li> </ol> <pre><code>oriole:/ # ./set_android_scaling_governor.sh sched_pixel\n...\noriole:/ # ./set_pixel6_gpu_scaling_policy.sh default\n...\n</code></pre> <p>TODO(scotttodd): Windows instructions</p>"},{"location":"developers/performance/profiling-cpu-events/","title":"Profiling CPUs","text":"<p>CPUs are able to record certain events that may be relevant when investigating the performance of a program. A common example of such an event is a \"cache miss\", when the program tries to access data in memory that isn't already in some CPU cache, causing that access to be slower than it could otherwise be.</p> <p>Querying and analyzing this data can be useful, but is hard in two distinct ways:</p> <ul> <li>Depending on the CPU and on the OS, both hardware and software limitations can   get in the way of obtaining accurate data.</li> <li>This data tends to be inherently difficult to interpret, even when it is   perfectly accurate. In practice it is often noisy and inaccurate, which makes   interpretation even more complicated.</li> </ul> <p>There are two parts to this page: platform-specific information about how to query this data, and, at the end, a platform-independent explanation of how to interpret it.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#perf-and-simpleperf-on-linux-and-android","title":"Perf and Simpleperf, on Linux and Android","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#overview","title":"Overview","text":"<p>The Linux kernel exposes system event counters to user-space programs by means of the <code>perf_event_open</code> system call. This includes both hardware event counters (such as CPU cache events) and software events from the kernel (such as page faults and context switches). Anyone may use this system call to implement a profiler, but Linux readily offers one, <code>perf</code>.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#preserving-artifacts","title":"Preserving artifacts","text":"<p>By default IREE cleans up any temporary files it creates while running. Tools like perf, however, require those files exist even after the process has exited. The environment variable <code>IREE_PRESERVE_DYLIB_TEMP_FILES</code> can be set to preserve the files. This is only needed for the CPU path when using the system loader.</p> <pre><code>export IREE_PRESERVE_DYLIB_TEMP_FILES=1\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#desktop-linux","title":"Desktop linux","text":"<p>On desktop Linux we can use <code>perf</code>. It is provided on most Linux distributions, for instance on Debian-based distributions do:</p> <pre><code>sudo apt install linux-perf\n</code></pre> <p>Run the program to be profiled, prepending its command line with <code>perf record</code>. By default this will write the profile data to the current directory, <code>./perf.data</code>. Sometimes this isn't ideal, such as then the current directory is under version control. Explicit paths can be specified by <code>-o</code> flag to direct the output of <code>perf record</code>, and then by <code>-i</code> flags to select the input of subsequent commands analyzing the profile. Example:</p> <pre><code>perf record -o /tmp/perf.data \\\n  ./tools/iree-benchmark-module \\\n    --device=local-task \\\n    ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p>By default, this samples time spent. One may specify instead an event to sample by, with the <code>-e</code> flag. For instance, to sample by L1 cache misses, one may do:</p> <pre><code>perf record -o /tmp/perf.data -e L1-dcache-load-misses \\\n  ./tools/iree-benchmark-module \\\n    --device=local-task \\\n    ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p><code>perf list</code> dumps the list of event types.</p> <p>Once you have recorded a profile, there are two main ways to analyze it: <code>perf report</code> and <code>perf annotate</code>.</p> <p><code>perf report</code> breaks down the event counts by symbol. In the default case where what was sampled was time, this is just an ordinary profile by symbol name, no different than what could be viewed in other profilers such as Tracy. Where it gets really interesting is when the profile was recording a specific event type, as in the above <code>-e L1-dcache-load-misses</code> example:</p> <pre><code>perf report -i /tmp/perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', Event count (approx.): 362571861\nOverhead  Command          Shared Object              Symbol\n  61.53%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_31\n  13.30%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_11\n   2.11%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_13\n   1.90%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_19\n   1.54%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_25\n   1.49%  cpu0             dylib_executablenzpx2Q.so  [.] serving_default_ex_dispatch_5\n</code></pre> <p><code>perf annotate</code> breaks down the event counts by instruction. Again, in the default case where what was sampled was time, this is no different than what could be viewed in Tracy, and the real motivation to use <code>perf</code> is when profiling by specific event types as in the above <code>-e L1-dcache-load-misses</code> example:</p> <pre><code>perf annotate -i perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', 4000 Hz, Event count (approx.): 362571861\nserving_default_ex_dispatch_31  /tmp/dylib_executablenzpx2Q.so [Percent: local period]\n  1.66 \u2502        movups -0x1000(%rdi),%xmm10\n  0.48 \u2502        movups -0x800(%rdi),%xmm9\n  0.82 \u2502        movups (%rdi),%xmm8\n  0.49 \u2502        movaps %xmm1,%xmm4\n  0.12 \u2502        shufps $0x0,%xmm1,%xmm4\n  0.14 \u2502        mulps  %xmm5,%xmm4\n  0.28 \u2502        addps  %xmm6,%xmm4\n  0.60 \u2502        movaps %xmm3,%xmm6\n  0.34 \u2502        shufps $0x0,%xmm3,%xmm6\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#warning","title":"Warning","text":"<p><code>perf annotate</code> is even noisier than <code>perf report</code> as it can be overly optimistic, depending on the CPU, to pin an event to a specific instruction. Typically, this works fairly well on x86 CPUs and less well on ARM CPUs and more generally on anything mobile. Even on a desktop x86 CPU, this is noisy, as the above example (recorded on a Skylake workstation) shows: it blamed a <code>mulps %xmm5,%xmm4</code> instruction for a cache miss, which doesn't make sense as that instruction only touches registers.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#android","title":"Android","text":"<p>On Android we can use <code>simpleperf</code>. It's preinstalled on current Android <code>userdebug</code> images, and part of the Android NDK.</p> <p>In theory, as Android is Linux, it should be possible to use <code>perf</code>. Unfortunately, <code>perf</code> is difficult to build for Android. Fortunately, <code>simpleperf</code> is readily available: it is preinstalled in Android <code>userdebug</code> images, and it is part of the Android NDK.</p> <p>First, we record on the device:</p> <pre><code>adb shell \\\n  simpleperf record -e raw-l1d-cache-refill -o /data/local/tmp/perf.data \\\n    /data/local/tmp/iree-benchmark-module \\\n      --device=local-task \\\n      ... command-line arguments of iree-benchmark-module as usual ...\n</code></pre> <p>Then pull the recorded data from the device, and analyze on the desktop. We assume that <code>${ANDROID_NDK}</code> points to the local copy of the Android NDK.</p> <pre><code>adb pull /data/local/tmp/perf.data /tmp/perf.data\n${ANDROID_NDK}/simpleperf/report.py -i /tmp/perf.data\n</code></pre> <p>This prints a breakdown of <code>raw-l1d-cache-refill</code> events by symbol.</p> <p>Like with <code>perf</code>, a list of event types can be queried by the <code>list</code> subcommand:</p> <pre><code>adb shell simpleperf list\n</code></pre>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#no-support-for-annotate-by-cpu-event","title":"No support for <code>annotate</code> by CPU event","text":"<p>There is no <code>simpleperf annotate</code>. The <code>simpleperf</code> documentation lists a couple of ways of achieving the same thing.</p> <p>However:</p> <ul> <li>The common case of annotating by time, as opposed to annotating by CPU event,   is supported by Tracy.</li> <li>Annotating by CPU event is inherently not working due to hardware limitations   of the ARM CPUs found in Android devices. That is, the hardware is too   imprecise at pinning an event to a particular instruction.</li> </ul>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#interpreting-cpu-event-counts","title":"Interpreting CPU event counts","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#problems","title":"Problems","text":"<p>There are multiple layers of complexity in interpreting CPU event counts.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-in-themselves-normal","title":"These events are in themselves normal","text":"<p>The first difficulty is in the fact that most of these events are normal. So just knowing that they happened is not in itself actionable.</p> <p>For example, if we learn that some code causes cache misses, that isn't big news: so does all code. Maybe this code has too many cache misses, but how many is too many? Maybe this code alone accounts for a large fraction of the overall total of the whole program, but maybe even that is normal, for instance if the code being studied is the 'hot' part of the program where a large fraction of overall time is spent?</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-hardware-dependent-and-under-documented","title":"These events are hardware-dependent and under-documented","text":"<p>Many of these events have a meaning that varies between CPUs and that is difficult to characterize on any CPU, let alone in a way that applies to all CPUs.</p> <p>For example, take the \"L2 data cache refill\". On ARM, with <code>simpleperf</code>, that would be <code>raw-l2d-cache-refill</code>. Questions:</p> <ul> <li>Is \u201cL2\u201d inclusive of   \u201cL1\u201d?</li> <li>How many bytes are transferred per \u201crefill\u201d?</li> <li>Are accesses induced by speculative execution or by automatic pre-fetching   counted in the same way as accesses induced by actual code execution?</li> </ul> <p>The answers to all of the above questions are CPU-dependent. They may even vary between the CPU cores of the same Android device.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-imprecise-and-noisy-particularly-on-arm-cpus","title":"These events are imprecise and noisy, particularly on ARM CPUs","text":"<p>Expect noise levels above 10% in many CPU event counts on ARM CPUs. Moreover, on ARM, as discussed above, there is inaccuracy in which instruction is blamed for which event, which will increase inaccuracy of per-symbol breakdowns for very cheap symbols (and makes <code>perf annotate</code> impossible as noted above). Finally, be aware that some ARM CPUs may perform event count interpolation, so we may not have any access to true hardware counts.</p>","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#recommendations","title":"Recommendations","text":"<p>Here is a workflow pattern that allows to make significant use of CPU event counts, despite all the problems noted above:</p> <ul> <li>Hypothesize that some code diff might help performance, and might help   reducing the number of CPU events of a certain type, and that the two might be   related.</li> <li>Benchmark with and without the code diff, on the same device, everything else   being equal.<ul> <li>Let your benchmark perform a fixed number of iterations, or, if using a benchmark termination condition of the form \"run until at least N seconds have elapsed\", carefully divide event counts by the actual number of iterations that were run.</li> </ul> </li> <li>If the observed CPU event count difference is significant, go ahead and claim   that your code diff probably helps with that aspect of CPU behavior.</li> </ul> <p>Some things NOT to be done:</p> <ul> <li>Don\u2019t try to compare different metrics, not even when it seems obvious that   they should satisfy a simple relationship, not even on the same CPU (e.g. \u201cL1   accesses should be greater than L2 accesses\u201d).</li> <li>Don\u2019t divide by some \u201ctotal\u201d metric to get some kinds of ratios. For example,   don\u2019t try to compute a \u201ccache miss ratio\u201d as quotient of \u201ccache refill\u201d over   \u201call cache accesses\u201d metrics. The first problem with that (even before we get   to CPU-specific issues) is that that\u2019s rewarding increases to the \u201call cache   accesses\u201d metrics, so if something bad happens in your codegen and your kernel   ends up spilling a lot of register to the stack, that\u2019s going to be a lot more   accesses which will all be L1 hits so that\u2019ll help this ratio look better!  So   more generally, just try to minimize some CPU metrics (that count \u201ccostly\u201d   events), not some more complex math expression formed from arithmetic on CPU   metrics.</li> </ul>","tags":["CPU"]},{"location":"developers/performance/profiling-gpu-vulkan/","title":"Profiling GPUs using Vulkan","text":"<p>Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used.</p> <p>(TODO: add some pictures for each tool)</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#renderdoc","title":"RenderDoc","text":"<p>Support for RenderDoc can be enabled by configuring cmake with <code>-DIREE_ENABLE_RENDERDOC_PROFILING=ON</code>. When built in to IREE the profiling functionality is available for programmatic use via the <code>iree_hal_device_profiling_begin</code> and <code>iree_hal_device_profiling_end</code> APIs.</p> <p>When using one of the standard IREE tools (<code>iree-run-module</code>, <code>iree-benchmark-module</code>, etc) the <code>--device_profiling_mode=queue</code> flag can be passed to enable capture around the entire invocation (be careful when benchmarking as the recordings can be quite large!). The default capture file name can be specified with <code>--device_profiling_file=foo.rdc</code>.</p> <p>Capturing in the RenderDoc UI can be done by specifying the IREE tool or embedding application (<code>iree-run-module</code>, etc) as the launch executable and adding all arguments as normal.</p> <p>Capturing from the command line can be done using <code>renderdoccmd</code> with the specified file appearing (by default) in the executable directory:</p> <pre><code>renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ...\nstat tools/foo.rdc\nrenderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ...\nstat /some/path/foo.rdc\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#android-gpus","title":"Android GPUs","text":"<p>There are multiple GPU vendors for the Android platforms, each offering their own tools. Android GPU Inspector (AGI) provides a cross-vendor solution. See the documentation for more details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#desktop-gpus","title":"Desktop GPUs","text":"<p>Vulkan supports both graphics and compute, but most tools in the Vulkan ecosystem focus on graphics. As a result, some Vulkan profiling tools expect commands to correspond to a sequence of frames presented to displays via framebuffers. This means additional steps for IREE and other Vulkan applications that solely rely on headless compute. For graphics-focused tools, we need to wrap IREE's logic inside a dummy rendering loop in order to provide the necessary markers for these tools to perform capture and analysis.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#amd","title":"AMD","text":"<p>For AMD GPUs, Radeon GPU Profiler (RGP) is the tool to understand fine details of how IREE GPU performs. See the documentation for details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#nvidia","title":"NVIDIA","text":"<p>For NVIDIA GPUs, NVIDIA Nsight Graphics is the tool to understand fine details of how IREE GPU performs. See the documentation for details.</p>","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-with-tracy/","title":"Profiling with Tracy","text":""},{"location":"developers/performance/profiling-with-tracy/#overview","title":"Overview","text":"<p>Tracy is a hybrid instrumentation and sampling profiler that IREE uses for performance analysis.</p> <p></p>"},{"location":"developers/performance/profiling-with-tracy/#instrumentation-and-sampling","title":"Instrumentation and sampling","text":"<ul> <li> <p>Instrumentation is generic code built into the program being profiled,     recording zone start and end timestamps where a developer requests them:</p> <p></p> <p>Most of IREE's runtime code is instrumented using the macros defined in iree/base/tracing.h:</p> <pre><code>void iree_sample_function() {\n  IREE_TRACE_ZONE_BEGIN(z0);\n  // All code here will be included in the zone for `iree_sample_function`.\n  IREE_TRACE_ZONE_END(z0);\n}\n</code></pre> </li> <li> <p>Sampling collects program state and information about the machine using     platform-specific APIs at a regular sampling frequency. Sampled data     includes callstacks, hardware counters, and more:</p> <p></p> <p>While recording instrumentation data requires no special setup, recording sampling data will need some configuration depending on your operating system. Refer to the \"Automated data collection\" section in the Tracy PDF manual for full details. Generally, sampling needs:</p> <ul> <li>Debug information from <code>-DCMAKE_BUILD_TYPE=RelWithDebInfo</code> or <code>Debug</code></li> <li>Privilege elevation from <code>sudo</code> on Unix or adminstrator on Windows</li> </ul> </li> </ul>"},{"location":"developers/performance/profiling-with-tracy/#remote-or-embedded-telemetry","title":"Remote or embedded telemetry","text":"<p>Tracy uses a client-server model with communication over a TCP socket:</p> <ul> <li>The \"client\" is the program being profiled.</li> <li>The \"server\" is either the Tracy profiler UI or the Tracy command-line   capture tool.</li> </ul> <pre><code>graph LR\n  tracyclient[\"Tracy Client\n  e.g. iree-run-module\"]\n  tracyserver[\"Tracy Server\"]\n  network([\"Network\"])\n\n  thread1[\"Thread 1\"] --&gt; tracyclient\n  thread2[\"Thread 2\"] --&gt; tracyclient\n  thread3[\"Thread 3\"] --&gt; tracyclient\n\n  tracyclient --&gt; network\n  network --&gt; tracyserver\n\n  tracyserver --&gt; display[\"Display\"]\n  tracyserver --&gt; storage[\"Storage\"]</code></pre> <p>This allows for remote capture, such as over SSH, as well as sharing of saved traces across machines.</p>"},{"location":"developers/performance/profiling-with-tracy/#the-tracy-manual","title":"The Tracy manual","text":"<p>The primary source of Tracy documentation, including how to build the profiler UI and CLI capture tool, is a PDF manual:</p> <p>Download tracy.pdf  View tracy.pdf in browser </p>"},{"location":"developers/performance/profiling-with-tracy/#capturing-a-trace","title":"Capturing a trace","text":"<p>You will need three things to capture a trace:</p> <ol> <li>The Tracy profiler UI (or CLI capture tool)</li> <li>A binary tool to trace, such as <code>iree-run-module</code>, built with tracing     support enabled</li> <li>A program to profile, e.g. a <code>.vmfb</code> file with parameters and input values</li> </ol> <p>The Tracy tools can either be downloaded from the official releases or they can be built from source by using either the upstream CMake build or IREE's downstream CMake build.</p>"},{"location":"developers/performance/profiling-with-tracy/#quickstart","title":"Quickstart","text":"<ol> <li> <p>Build <code>iree-run-module</code> (or other tools like <code>iree-benchmark-module</code>) with     tracing support:</p> <pre><code># Sampling needs debug info from the `RelWithDebInfo` or `Debug` build type.\n\ncmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON\ncmake --build ../iree-build/ --target iree-run-module\n</code></pre> <p>For more information about building from source, follow the Getting started page.</p> Tip - Instrumented Python packages <p>The <code>iree-runtime</code> Python package includes prebuilt instrumented tools. Set the <code>IREE_PY_RUNTIME=tracy</code> environment variable to use them:</p> <pre><code>python -m pip install iree-runtime\nIREE_PY_RUNTIME=tracy iree-run-module ...\n</code></pre> <p>You should see the following message printed to stderr:</p> <p><code>-- Using Tracy runtime (IREE_PY_RUNTIME=tracy)</code></p> <p>See this section in the Python bindings documentation for more details.</p> </li> <li> <p>Compile a program to profile:</p> <pre><code># The --iree-hal-executable-debug-level=3 flag embeds source information\n# about each executable into the .vmfb file for the runtime to pass to\n# Tracy. Without this flag, source locations are included on a best-effort\n# basis, typically coming from the input .mlir or .py file.\n\niree-compile program_input.mlir \\\n  --iree-hal-target-backends={target} \\\n  --iree-hal-executable-debug-level=3 \\\n  -o program.vmfb\n</code></pre> </li> <li> <p>Run the program using the instrumented <code>iree-run-module</code>:</p> <pre><code># Set the TRACY_NO_EXIT environment variable to keep short-running programs\n# from exiting before connecting.\n#\n# Some platforms need elevated permissions (root / sudo / administrator)\n# to collect sampling data using kernel facilities. If you only want to\n# collect instrumentation data or your platform does not require it, you\n# can run with more limited permissions.\n\nTRACY_NO_EXIT=1 sudo iree-run-module \\\n  --module=program.vmfb \\\n  --device={device} \\\n  --entry_function={entry} \\\n  --parameters={parameters} \\\n  --input={arg0} \\\n  --input={arg1} \\\n  ...\n</code></pre> </li> <li> <p>While the program is running, connect using the Tracy profiler UI or capture     tool:</p> Tracy profiler UITracy capture tool <p>The profiler UI lists available clients or can be set to connect to the next instrumented process:</p> <p></p> <p>The capture tool can be used programmatically and over SSH:</p> <pre><code>$ capture -o /tmp/capture.tracy\n\nConnecting to 127.0.0.1:8086...\n</code></pre> </li> <li> <p>View the captured trace once it finishes collecting events. Traces captured     by the profiler UI can also be saved to <code>.tracy</code> files for sharing and     archival.</p> </li> </ol>"},{"location":"developers/performance/profiling-with-tracy/#including-more-information-in-traces","title":"Including more information in traces","text":""},{"location":"developers/performance/profiling-with-tracy/#changing-iree_tracing_mode","title":"Changing <code>IREE_TRACING_MODE</code>","text":"<p>Set IREE's <code>IREE_TRACING_MODE</code> value (defined in iree/base/tracing.h) to adjust which tracing features are enabled. Each feature adds tracing overhead and increases the size of trace files, so adjust this setting with care.</p> <p>For example, to track memory allocations with callstacks:</p> <pre><code>cmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON \\\n    -DIREE_TRACING_MODE=4\ncmake --build ../iree-build/ --target iree-run-module\n</code></pre> <p>The Memory window in the Tracy profiler should then show callstacks for each allocation:</p> <p></p>"},{"location":"developers/performance/profiling-with-tracy/#options-for-the-llvm-cpu-backend","title":"Options for the <code>llvm-cpu</code> backend","text":"<p>When using the <code>llvm-cpu</code> backend (<code>--iree-hal-target-backends=llvm-cpu</code> with <code>--device=local-task</code> or <code>--device=local-sync</code>), these options are available:</p> <ul> <li> <p>The <code>--iree-llvmcpu-link-embedded=false</code> flag uses the \"system\" linker     (.so/.dylib/.dll) instead of the generic     \"embedded\" ELF linker, allowing Tracy to look more deeply at generated code:</p> <p></p> </li> <li> <p>The <code>IREE_PRESERVE_DYLIB_TEMP_FILES</code> environment variable can be used on     Posix platforms to ensure that Tracy can view IREE's generated native code.</p> </li> <li> <p>Ensure that <code>--iree-llvmcpu-debug-symbols=true</code> is set (it is by default).</p> </li> </ul> <p>Putting those flags and environment variables together in an example:</p> <pre><code>iree-compile program_input.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  --iree-hal-executable-debug-level=3 \\\n  --iree-llvmcpu-link-embedded=false \\\n  --iree-llvmcpu-debug-symbols=true \\\n  -o program_full_info.vmfb\n\nTRACY_NO_EXIT=1 IREE_PRESERVE_DYLIB_TEMP_FILES=1 sudo iree-run-module \\\n  --device=local-task \\\n  --module=program_full_info.vmfb \\\n  ...\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#remote-capture-eg-ssh-android","title":"Remote capture (e.g. SSH, Android)","text":"<p>Tracy's client/server connection uses TCP port 8086 by default. If the Tracy-instrumented program is running on a separate machine, this port needs to be forwarded.</p> <p>In particular, when profiling on Android, this is needed:</p> <pre><code>adb forward tcp:8086 tcp:8086\n</code></pre> <p>You can also pass <code>-p &lt;port&gt;</code> to the capture tool to override the default port to connect to, or use the Tracy GUI which scans other ports too.</p>"},{"location":"developers/performance/profiling-with-tracy/#touring-the-tracy-profiler-ui","title":"Touring the Tracy profiler UI","text":"<p>The initial view should look like this:</p> <p></p> <p>Before going further, take a second to check that your recorded profile data has all the data that it should have. Permissions issues could cause it to lack \"sampling\" or \"CPU data\" information. For example, here is what he initial view looks like when one forgot to run the profiled program as root on Desktop Linux (where running as root is required):</p> <p></p> <p>Notice how the latter screenshot is lacking the following elements:</p> <ul> <li>No 'CPU data' header in the top left, with the list of all CPU cores.</li> <li>No 'ghost' icon next to the 'Main thread' header.</li> </ul> <p>Click the 'Statistics' button at the top. It will open a window like this:</p> <p></p> <p>See how the above screenshot has two radio buttons at the top: 'Instrumentation' and 'Sampling'. At this point, if you don't see the 'Sampling' radio button, you need to resolve that first, as discussed above about possible permissions issues.</p> <p>These 'Instrumentation' and 'Sampling' statistics correspond the two kinds of data that Tracy collects about your program. In the Tracy main view, they correspond, respectively, to 'instrumentation' and 'ghost' zones. Refer to the Tracy PDF manual for a general introduction to these concepts. For each thread, the ghost icon toggles the view between these two kinds of zones.</p> <p>Back to the main view, look for the part of the timeline that is of interest to you. Your area of interest might not be on the Main thread. In fact, it might be on a thread that's not visible in the initial view at all. To pan around with the mouse, hold the right mouse button down (or its keyboard equivalent on macOS). Alternatively, look for the 'Frame' control at the top of the Tracy window. Use the 'next frame' arrow button until more interesting threads appear.</p> <p>IREE module code tends to run on a thread whose name contains the word <code>worker</code>.</p> <p>Once you have identified the thread of interest, you typically want to click its ghost icon to view its \"ghost\" (i.e. sampling) zones. Here is what you should get when clicking on a ghost zone:</p> <p></p> <p>The percentages column to the left of the disassembly shows where time is being spent. This is unique to the sampling data (ghost zones) and has no equivalent in the instrumentation data (instrumentation zones). Here is what we get clicking on the corresponding instrumentation zone:</p> <p></p> <p>This still has a 'Source' button but that only shows the last C++ caller that had explicit Tracy information, so here we see a file under <code>iree/hal</code> whereas the Ghost zone saw into the IREE compiled module that that calls into, with the source view pointing to the <code>.mlir</code> file.</p>"},{"location":"developers/performance/profiling-with-tracy/#tracing-iree-compile","title":"Tracing <code>iree-compile</code>","text":"<p>Tracing <code>iree-compile</code> is much like tracing the runtime tools, except that both of these options need to be set with CMake: <code>-DIREE_ENABLE_RUNTIME_TRACING=ON -DIREE_ENABLE_COMPILER_TRACING=ON</code>:</p> <pre><code>cmake -G Ninja -B ../iree-build/ -S . \\\n    -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n    -DIREE_ENABLE_RUNTIME_TRACING=ON \\\n    -DIREE_ENABLE_COMPILER_TRACING=ON\ncmake --build ../iree-build/ --target iree-compile\n</code></pre> <p>The steps for collecting traces are the same: run the instrumented program and connect using the Tracy profiler UI or capture tool.</p> <p></p> <ul> <li>MLIR passes are instrumented using   Pass Instrumentation,   (see   <code>TracingUtils.h</code>)</li> <li>Zones are annotated with op breadcrumbs indicating which root op was processed</li> <li>Each compilation phase (e.g. Flow, Stream, HAL) is tagged as a \"frame\", so   you can jump between them, limit statistics to them, and see how much time   each took</li> </ul> Caution - Tracy sampling with <code>iree-compile</code> <p>When tracing the compiler, the LLVM/MLIR code can easily generate millions of trace events. Traces captured with sampling can thus take hours to collect, require 40GB+ of RAM to view, and take 1GB+ on disk to store.</p> <p></p> <p>However, sampling is especially useful in diagnosing long compile times, since only the MLIR passes are instrumentated, unlike in IREE's runtime where most functions are covered.</p> <p>For more tips on profiling the compiler, see the Compile time regression debugging page.</p>"},{"location":"developers/performance/profiling-with-tracy/#troubleshooting","title":"Troubleshooting","text":""},{"location":"developers/performance/profiling-with-tracy/#resource_exhausted-failed-to-open-file-issue","title":"\"RESOURCE_EXHAUSTED; failed to open file\" issue","text":"<p>This is a known issue with how tracy operates. One way to workaround it is to manually increase the total number of files that can be kept opened simultaneously and run the command with that setting:</p> <pre><code>sudo sh -c \"ulimit -n &lt;bigNum&gt; &amp;&amp; &lt;myTracyInstrumentedProgram&gt;\"\n</code></pre> <p>Info</p> <p>Tracy keeps a number of file descriptors open that, depending on the machine and its settings, may exceed the limit allowed by the system resulting in IREE failing to open more files. In particular, it is commom to have a relatively low limit when running with <code>sudo</code>.</p>"},{"location":"developers/performance/profiling-with-tracy/#appendix","title":"Appendix","text":""},{"location":"developers/performance/profiling-with-tracy/#building-tracy-from-source","title":"Building Tracy from source","text":""},{"location":"developers/performance/profiling-with-tracy/#install-dependencies","title":"Install dependencies","text":""},{"location":"developers/performance/profiling-with-tracy/#do-you-need-capstone-next","title":"Do you need capstone-next?","text":"<p>You can skip this section if you don't need disassembly of CPU code.</p> <p>Capstone is the disassembly framework used by Tracy. The default branch, which is what OS packages still distribute, is running a few years behind current CPU architectures.</p> <p>Newer CPU architectures such as RISC-V, or newer extensions of existing architectures (e.g. new SIMD instructions in the ARM architecture) are typically only supported in the <code>next</code> branch. If you need that support, check out and build that branch. Consider uninstalling any OS package for <code>capstone</code> or otherwise ensure that your IREE build will pick up your <code>next</code> branch build.</p>"},{"location":"developers/performance/profiling-with-tracy/#linux","title":"Linux","text":"<p>If you haven't opted to build <code>capstone-next</code> (see above section), install the OS package for <code>capstone</code> now (Debian-based distributions):</p> <pre><code>sudo apt install libcapstone-dev\n</code></pre> <p>Install other dependencies:</p> <pre><code>sudo apt install libtbb-dev libzstd-dev libglfw3-dev libfreetype6-dev libgtk-3-dev\n</code></pre> <p>If you only build the command-line tool <code>iree-tracy-capture</code> and not the graphical <code>iree-tracy-profiler</code>, you can install only:</p> <pre><code>sudo apt install libtbb-dev libzstd-dev\n</code></pre> <p>The zstd version on Ubuntu 18.04 is old. You will need to install it from source from https://github.com/facebook/zstd.git</p>"},{"location":"developers/performance/profiling-with-tracy/#mac","title":"Mac","text":"<p>If you haven't opted to build <code>capstone-next</code> (see above section), install the system <code>capstone</code> now:</p> <pre><code>brew install capstone\n</code></pre> <p>Install other dependencies:</p> <pre><code>brew install pkg-config glfw freetype tbb zstd\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#build-the-tracy-tools","title":"Build the Tracy tools","text":"<p>A CMake-based build system for Tracy is maintained as part of IREE. In your IREE host build directory, set the following CMake option:</p> <pre><code>cmake -DIREE_BUILD_TRACY=ON -DIREE_ENABLE_LLD=ON .\n</code></pre> <p>That enables building the Tracy server tools, <code>iree-tracy-profiler</code> and <code>iree-tracy-capture</code>, introduced above. It also enables building the tool <code>iree-tracy-csvexport</code> which can be used to export a captured trace as a CSV file (see Section 6 \"Exporting zone statistics to CSV\" in the Tracy manual).</p> <p>TODO - switch to using upstream CMake project</p> <p>Tracy now has an upstream CMake build for each of its components. We may be able to use this directly.</p> <p>If profiling on Android/ARM, you might need the patch discussed in the next paragraph.</p> <p>Consider building without assertions (<code>cmake -DIREE_ENABLE_ASSERTIONS=OFF</code>). At least <code>iree-tracy-profiler</code> has some faulty assertions that can cause the profiler UI to crash during normal usage.</p> <p>Rebuild, either everything or just these specific targets:</p> <pre><code>cmake --build . --target iree-tracy-profiler iree-tracy-capture iree-tracy-csvexport\n</code></pre> <p>This should have created the <code>iree-tracy-profiler</code>, <code>iree-tracy-capture</code>, and <code>iree-tracy-csvexport</code> binaries:</p> <pre><code>$ find . -name iree-tracy-*\n./tracy/iree-tracy-profiler\n./tracy/iree-tracy-capture\n./tracy/iree-tracy-csvexport\n</code></pre>"},{"location":"developers/performance/profiling-with-tracy/#android-system-settings-required-for-sampling-and-systrace","title":"Android system settings required for Sampling and SysTrace","text":"<p>When profiling on an Android device, in order to get the most useful information in the trace, tweak system permissions as follows before profiling. This needs to be done again after every reboot of the Android device.</p> <p>From your desktop, get a shell on the Android device:</p> <pre><code>adb shell\n</code></pre> <p>The following commands are meant to be run from that Android device shell. First, get root access:</p> <pre><code>su\n</code></pre> <p>Now run the following commands as root on the Android device:</p> <pre><code>setenforce 0\nmount -o remount,hidepid=0 /proc\necho 0 &gt; /proc/sys/kernel/perf_event_paranoid\necho 0 &gt; /proc/sys/kernel/kptr_restrict\n</code></pre> <p>Note: in order for this to work, the device needs to be rooted, which means that the above <code>su</code> command must succeed. This is sometimes confused with the <code>adb root</code> command, but that's not the same. <code>adb root</code> restarts the <code>adbd</code> daemon as root, which causes device shells to be root shells by default. This is unnecessary here and we don't recommend it: real Android applications never run as root, so Tracy/Android has to support running benchmarks as regular user and it's best to stick to this for the sake of realistic benchmarks. Internally, Tracy executes <code>su</code> commands to perform certain actions, so it too relies on the device being rooted without relying on the benchmark process being run as root.</p>"},{"location":"developers/performance/profiling/","title":"Profiling overview","text":"<p>IREE benchmarking gives us an accurate and reproducible view of program performance at specific levels of granularity. To analyze system behavior in more depth, there are various ways to profile IREE.</p>"},{"location":"developers/performance/profiling/#cpu-cache-and-other-cpu-event-profiling","title":"CPU cache and other CPU event profiling","text":"<p>For some advanced CPU profiling needs such as querying CPU cache and other events, one may need to use some OS-specific profilers. See Profiling CPUs.</p>"},{"location":"developers/performance/profiling/#vulkan-gpu-profiling","title":"Vulkan GPU Profiling","text":"<p>Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used. Refer to Profiling GPUs using Vulkan.</p>"},{"location":"developers/performance/profiling/#tracy","title":"Tracy","text":"<p>Tracy is a profiler that's been used for a wide range of profiling tasks on IREE. Refer to Profiling with Tracy.</p>"},{"location":"guides/","title":"Guides","text":""},{"location":"guides/#ml-frameworks","title":"ML frameworks","text":"<p>Start here: ML frameworks overview</p> <p>Guides for specific frameworks:</p> <ul> <li> TensorFlow and    TensorFlow Lite</li> <li> JAX</li> <li> PyTorch</li> </ul>"},{"location":"guides/#deployment-configurations","title":"Deployment configurations","text":"<p>Start here: Deplyment configurations overview</p> <p>Guides for specific configurations:</p> <ul> <li> CPU for general   purpose CPU deployment</li> <li> CPU - Bare-Metal   with minimal platform dependencies</li> <li> GPU - Vulkan   for cross-platform usage and interop with graphics applications</li> <li> GPU - CUDA   for NVIDIA-specific solutions</li> <li> GPU - ROCm   for AMD-specific solutions</li> <li> GPU - Metal   for running on Apple hardware</li> </ul>"},{"location":"guides/#general-topics","title":"General topics","text":"<ul> <li> Parameters for managing   large chunks of program data</li> </ul>"},{"location":"guides/parameters/","title":"Parameters","text":""},{"location":"guides/parameters/#overview","title":"Overview","text":"<p>Parameters in IREE are externalized storage for resources that are asynchronously accessible and device-aware. Parameters offer efficient ways to store, manipulate, and load data for large resources like the weights in a machine learning model.</p> <p>Without using parameters, compiled programs include both code and data:</p> <pre><code>graph LR\n  accTitle: .vmfb file without using parameters\n  accDescr {\n    Without using parameters, .vmfb files contain host code, device code,\n    small data, and large resources all in the same file.\n  }\n\n  subgraph VMFB[\".vmfb file\"]\n    HostCode(Host code)\n    DeviceCode(Device code)\n    SmallData(Small data)\n    LargeResources(Large resources)\n  end</code></pre> <p>Using parameters, data can be stored, transmitted, and loaded from separate sources:</p> <pre><code>graph BT\n  accTitle: .vmfb file using parameters\n  accDescr {\n    Using parameters, .vmfb files contain host code, device code, small\n    constants, and parameters. External .irpa, .safetensors, and .gguf files\n    can be linked to these parameters.\n  }\n\n  subgraph VMFB[\".vmfb file using parameters\"]\n    HostCode(Host code)\n    DeviceCode(Device code)\n    SmallData(Small data)\n    Parameters(\"Parameters\n    \u2022 scope_1::key_1\n    \u2022 scope_1::key_2\n    \u2022 scope_2::key_1\n    \u2022 scope_2::key_2\")\n  end\n\n  subgraph IRPA[\".irpa file\"]\n    key_1\n    key_2\n  end\n\n  subgraph Safetensors[\".safetensors file\"]\n    key_1a[key_1]\n  end\n\n  subgraph GGUF[\".gguf file\"]\n    key_2a[key_2]\n  end\n\n  IRPA -. \"scope_1\" .-&gt; Parameters\n  Safetensors -. \"scope_2\" .-&gt; Parameters\n  GGUF -. \"scope_2\" .-&gt; Parameters</code></pre> <p>Note</p> <p>Notice that parameters are identified by a scope and a unique key within that scope, not strong references to specific file paths. Data from any supported file format or \"parameter index provider\" can be loaded.</p>"},{"location":"guides/parameters/#supported-formats","title":"Supported formats","text":""},{"location":"guides/parameters/#irpa","title":"IRPA","text":"<p>The IREE Parameter Archive (IRPA) file format (<code>iree/schemas/parameter_archive.h</code>) is IREE's own format optimized for deployment. Formats like GGUF and safetensors can be converted to IRPA.</p> <ul> <li>Data is always aligned in IRPA files for efficient loading</li> <li>IRPA files contain minimal metadata and are fully hermetic. Buffers are   stored as opaque byte range blobs, not as tensors with explicit types and   shapes</li> <li>For testing and benchmarking workflows, IRPA files may include a mix of real   data and splatted values (repeating patterns with no storage requirements on   disk)</li> </ul>"},{"location":"guides/parameters/#gguf","title":"GGUF","text":"<p>The GGUF format is used by the GGML project and other projects in that ecosystem like llama.cpp.</p> <ul> <li>GGUF files are non-hermetic - using them requires knowledge about the settings   used to compile GGML in order to interpret the contents of each file   (particularly for various quantization formats)</li> <li>GGUF files are aligned, so they should have matching performance with IRPA   files</li> </ul>"},{"location":"guides/parameters/#safetensors","title":"Safetensors","text":"<p>The safetensors format is used by the Hugging Face community.</p> <ul> <li>Safetensors files are not naturally aligned to support efficient loading, so   using them across runtime devices comes with (possibly severe) performance   penalties</li> </ul>"},{"location":"guides/parameters/#extensibility-and-other-formats","title":"Extensibility and other formats","text":"<p>The core IREE tools are written in C and aim to be simple and pragmatic, with minimal dependencies. Other formats could be converted into supported file types:</p> <ul> <li>PyTorch <code>.pt</code> and <code>.pth</code> files (serialized state dictionaries produced with   <code>torch.save</code>)</li> <li>TensorFlow checkpoint (<code>.ckpt</code>, <code>.h5</code>) files or SavedModel /   <code>model.keras</code>   archives (see the TensorFlow guide)</li> </ul> <p>In-tree formats for file-backed parameters are defined in the <code>iree/io/formats/</code> folder. Additional formats could be defined out-of-tree to make use of external libraries as needed.</p> <p>Parameter loading from memory (or a cache, or some other location) is possible by adding new providers implementing <code>iree_io_parameter_provider_t</code>. The default parameter index provider operates on files on local disk.</p>"},{"location":"guides/parameters/#working-with-parameter-files","title":"Working with parameter files","text":""},{"location":"guides/parameters/#creating-parameter-files","title":"Creating parameter files","text":"<p>The <code>iree-create-parameters</code> tool can create IREE Parameter Archive (.irpa) files. Each parameter in the archive can be created with either a real data value (taking up storage space in the final archive) or a splatted value (zeroed contents or a repeated value, taking up no storage space on disk).</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-create-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-create-parameters\n# ============================================================================\n\nCreates IREE Parameter Archive (.irpa) files. Provide zero or more\nparameter value declarations and an output file with\n`--output=file.irpa` to produce a new file with zeroed or patterned\ncontents.\n\n...\n</code></pre> <ul> <li> <p>Example creating a file with two zeroed embedded parameters and one with a   repeating pattern:</p> <pre><code>$ iree-create-parameters \\\n    --data=my.zeroed_param_1=4096xf32 \\\n    --data=my.zeroed_param_2=2x4096xi16 \\\n    --data=my.pattern_param_2=8x2xf32=2.1 \\\n    --output=output_with_storage.irpa\n</code></pre> </li> <li> <p>Example creating a file with splatted values (no storage on disk):</p> <pre><code>$ iree-create-parameters \\\n    --splat=my.splat_param_1=4096xf32=4.1 \\\n    --splat=my.splat_param_2=2x4096xi16=123 \\\n    --output=output_without_storage.irpa\n</code></pre> </li> </ul> <p>Parameter archives can also be created using IREE's Python bindings:</p> <pre><code>import iree.runtime as rt\nimport numpy as np\n\nparameter_index = rt.ParameterIndex()\nparameter_index.add_buffer(\"weight\", np.zeros([32, 16]) + 2.0)\nparameter_index.add_buffer(\"bias\", np.zeros([32, 16]) + 0.5)\nparameter_index.create_archive_file(\"parameters.irpa\")\n</code></pre> <p>See the <code>runtime/bindings/python/tests/io_test.py</code> file for more usage examples.</p>"},{"location":"guides/parameters/#converting-to-the-irpa-format","title":"Converting to the IRPA format","text":"<p>The <code>iree-convert-parameters</code> tool converts supported files into IREE Parameter Archives (.irpa) files.</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-convert-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-convert-parameters\n# ============================================================================\n\nConverts supported parameter file formats into IREE Parameter Archives\n(.irpa) files. Provide one or more input parameter files in the same\nform as expected by the iree-run-module tool (`--parameters=foo.gguf`)\nand an output file with `--output=file.irpa`.\n\n...\n</code></pre> <ul> <li> <p>Example converting from safetensors to IRPA:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=input.safetensors \\\n    --output=output.irpa\n</code></pre> </li> <li> <p>Example mutating parameters:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=a.gguf \\\n    --parameters=b.safetensors \\\n    --exclude=unneeded_param \\\n    --rename=old_name=new_name \\\n    --splat=some_name=f32=4.2 \\\n    --output=ab.irpa\n</code></pre> </li> <li> <p>Example stripping parameters and replacing them with zeros except for one   with special handling:</p> <pre><code>$ iree-convert-parameters \\\n    --parameters=input.irpa \\\n    --strip \\\n    --splat=special_param=f32=1.0 \\\n    --output=output.irpa\n</code></pre> </li> </ul>"},{"location":"guides/parameters/#inspecting-parameter-files","title":"Inspecting parameter files","text":"<p>The <code>iree-dump-parameters</code> tool outputs information about parsed parameter files.</p> Tip: <code>--help</code> output <p>For a detailed list of options, pass <code>--help</code>:</p> <pre><code>$ iree-dump-parameters --help\n\n# ============================================================================\n# \ud83d\udc7b IREE: iree-dump-parameters\n# ============================================================================\n\nDumps information about parsed parameter files.\n\n...\n</code></pre> <ul> <li> <p>Example listing all available parameters and their index information:</p> <pre><code>$ iree-dump-parameters \\\n    --parameters=my_scope=my_file.gguf \\\n    [--parameters=...]\n</code></pre> </li> <li> <p>Example extracting parameter binary contents from a file:</p> <pre><code>$ iree-dump-parameters ... \\\n    --extract=scope::key0=file0.bin \\\n    [--extract=...]\n</code></pre> </li> </ul>"},{"location":"guides/parameters/#loading-parameters-from-files","title":"Loading parameters from files","text":""},{"location":"guides/parameters/#on-the-command-line","title":"On the command line","text":"<p>IREE command line tooling can load parameter files alongside module files:</p> <pre><code>iree-run-module --module=program.vmfb --parameters=data.irpa ...\n</code></pre> <p>For concrete examples, see these test files:</p> <ul> <li><code>tools/test/parameters_scoped.mlir</code></li> <li><code>tools/test/parameters_unscoped.mlir</code></li> </ul>"},{"location":"guides/parameters/#from-python","title":"From Python","text":"<p>See the <code>runtime/bindings/python/tests/io_runtime_test.py</code> file for usage examples.</p>"},{"location":"guides/parameters/#using-the-c-api","title":"Using the C API","text":"<p>TODO: <code>iree_io_parameters_module_create()</code> sample code</p>"},{"location":"guides/deployment-configurations/","title":"Deployment configurations","text":"<p>IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE to load programs on demand and to take advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.</p>"},{"location":"guides/deployment-configurations/#stable-configurations","title":"Stable configurations","text":"<ul> <li> CPU for general   purpose CPU deployment</li> <li> CPU - Bare-Metal   with minimal platform dependencies</li> <li> GPU - Vulkan   for cross-platform usage and interop with graphics applications</li> <li> GPU - CUDA   for NVIDIA-specific solutions</li> <li> GPU - ROCm   for AMD-specific solutions</li> <li> GPU - Metal   for running on Apple hardware</li> </ul> <p>These are just the most stable configurations IREE supports. Feel free to reach out on any of IREE's communication channels if you have questions about a specific platform, hardware accelerator, or set of system features.</p>"},{"location":"guides/deployment-configurations/#compiler-target-backends","title":"Compiler target backends","text":"<p>Compiler target backends are used to generate executable code for hardware APIs and device architectures. Compiler targets may implement special optimizations or generate distinct code for certain device/architecture/performance profiles.</p> <p>When compiling programs, a list of target backends must be specified via</p> <ul> <li><code>--iree-hal-target-backends=</code> (command-line)</li> <li><code>target_backends=[...]</code> (Python)</li> </ul> Target backend Description Compatible HAL devices <code>llvm-cpu</code> Code generation for CPU-like devices supported by LLVM <code>local-sync</code>, <code>local-task</code> <code>vmvx</code> Portable interpreter powered by a microkernel library <code>local-sync</code>, <code>local-task</code> <code>vulkan-spirv</code> Portable GPU support via SPIR-V for Vulkan <code>vulkan</code> <code>cuda</code> NVIDIA GPU support via PTX for CUDA <code>cuda</code> <code>metal-spirv</code> GPU support on Apple platforms via MSL for Metal <code>metal</code> <code>rocm</code> Experimental  AMD GPU support via HSACO for ROCm <code>rocm</code> <code>webgpu-spirv</code> Experimental  GPU support on the Web via WGSL for WebGPU <code>webgpu</code> <p>Tip - listing available backends</p> <p>The list of compiler target backends can be queried:</p> Command-linePython bindings <pre><code>$ iree-compile --iree-hal-list-target-backends\n\nRegistered target backends:\n    cuda\n    llvm-cpu\n    metal\n    metal-spirv\n    rocm\n    vmvx\n    vmvx-inline\n    vulkan\n    vulkan-spirv\n</code></pre> <pre><code>iree.compiler.query_available_targets()\n\n['cuda',\n 'llvm-cpu',\n 'metal',\n 'metal-spirv',\n 'rocm',\n 'vmvx',\n 'vmvx-inline',\n 'vulkan',\n 'vulkan-spirv']\n</code></pre>"},{"location":"guides/deployment-configurations/#runtime-hal-driversdevices","title":"Runtime HAL drivers/devices","text":"<p>Runtime HAL devices call into hardware APIs to load and run executable code. Devices may use multithreading or other system resources, depending on their focus and the build configuration.</p> HAL device Description <code>local-sync</code> Synchronous local CPU device with inline execution <code>local-task</code> Multithreaded local CPU device using a 'task' executor <code>vulkan</code> Portable GPU execution using the Vulkan API <code>cuda</code> NVIDIA GPU execution using CUDA <code>metal</code> GPU execution on Apple platforms using Metal <code>rocm</code> Experimental  AMD GPU execution using ROCm <code>webgpu</code> Experimental  GPU execution on the web using WebGPU <p>Additional HAL drivers can also be defined external to the core project via <code>IREE_EXTERNAL_HAL_DRIVERS</code>.</p>"},{"location":"guides/deployment-configurations/bare-metal/","title":"Running on a bare-metal platform","text":"<p>IREE supports model execution via CPU on bare-metal platforms. Bare metal platforms have no operating system support, and executables are built using machine-specific linker scripts and/or board support packages (BSPs).</p> <p>Bare-metal deployment typically uses IREE's LLVM compiler target backend much like the CPU configuration, but using a limited subset of IREE's CPU HAL driver code at runtime to load and execute compiled programs.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#prerequisites","title":"Prerequisites","text":"<p>Out-of-tree bare-metal platform tools and source code for the system should be ready, such as</p> <ul> <li>Compilation toolchain</li> <li>Platform linker script</li> <li>Firmware libraries</li> </ul> <p>Please follow the instructions to retrieve the IREE compiler.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compile-the-model-for-bare-metal","title":"Compile the model for bare-metal","text":"<p>The model can be compiled with the following command:</p> <pre><code>iree-compile \\\n    --iree-stream-partitioning-favor=min-peak-memory \\\n    --iree-hal-target-backends=llvm-cpu \\\n    --iree-llvmcpu-target-triple=x86_64-pc-linux-elf \\\n    --iree-llvmcpu-debug-symbols=false \\\n    samples/models/simple_abs.mlir \\\n    -o /tmp/simple_abs_cpu.vmfb\n</code></pre> <p>In which</p> <ul> <li><code>--iree-stream-partitioning-favor=min-peak-memory</code>: Optimize for minimum peak     memory usage at the cost of concurrency - include when targeting     single-threaded execution to reduce memory consumption.</li> <li><code>--iree-hal-target-backends=llvm-cpu</code>: Compile using the LLVM CPU target</li> <li><code>--iree-llvmcpu-target-triple</code>: Use the <code>&lt;arch&gt;-pc-linux-elf</code> LLVM target triple     so the artifact has a fixed ABI to be rendered by the     elf_module library</li> <li><code>--iree-llvmcpu-debug-symbols=false</code>: To reduce the artifact size</li> </ul> <p>See generate.sh for example command-line instructions of some common architectures.</p> <p>You can replace the MLIR file with the other MLIR model files, following the instructions.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compiling-the-bare-metal-model-for-static-library-support","title":"Compiling the bare-metal model for static-library support","text":"<p>See the static_library demo sample for an example and instructions on running a model with IREE's <code>static_library_loader</code>.</p> <p>By default, the demo targets the host machine when compiling. To produce a bare-metal compatible model, run <code>iree-compile</code> as in the previous example and add the additional <code>-iree-llvmcpu-static-library-output-path=</code> flag to specify the static library destination. This will produce a <code>.h\\.o</code> file to link directly into the target application.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#build-bare-metal-runtime-from-source","title":"Build bare-metal runtime from source","text":"<p>A few CMake options and macros should be set to build a subset of IREE runtime libraries compatible with the bare-metal platform. We assume there's no multi-thread control nor system library support in the bare-metal system. The model execution is in a single-thread synchronous fashion.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#set-cmake-options","title":"Set CMake options","text":"<pre><code># Build the IREE runtime only\nset(IREE_BUILD_COMPILER OFF)\n\n# Tell CMake to skip targeting a specific operating system\nset(CMAKE_SYSTEM_NAME Generic)\n\n# Disable multi-thread library support\nset(IREE_ENABLE_THREADING OFF)\n\n# Only enable the local synchronous HAL driver\nset(IREE_HAL_DRIVER_DEFAULTS OFF)\nset(IREE_HAL_DRIVER_LOCAL_SYNC ON)\n\n# Only enable some executable loaders\nset(IREE_HAL_EXECUTABLE_LOADER_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF ON)\nset(IREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE ON)\n\n# Only enable the embedded ELF executable plugin\nset(IREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_PLUGIN_EMBEDDED_ELF ON)\n\n# Disable tests until IREE supports running them on bare-metal platforms\nset(IREE_BUILD_TESTS OFF)\n\n# Build samples\nset(IREE_BUILD_SAMPLES ON)\n</code></pre> <p>Todo</p> <p>Clean the list up after #6353 is fixed.</p> <p>Also, set the toolchain-specific cmake file to match the tool path, target architecture, target abi, linker script, system library path, etc.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#define-iree-macros","title":"Define IREE macros","text":"<p>These macros should be defined, either in C/C++ or via CMake options like</p> <pre><code>set(MY_FLAGS \"-DIREE_PLATFORM_GENERIC=1\")\nset(CMAKE_C_FLAGS ${MY_FLAGS} ${CMAKE_C_FLAGS})\nset(CMAKE_CXX_FLAGS ${MY_FLAGS} ${CMAKE_CXX_FLAGS})\n</code></pre> Macro Description <code>IREE_PLATFORM_GENERIC</code> Let IREE build the runtime library without targeting a specific platform. <code>IREE_SYNCHRONIZATION_DISABLE_UNSAFE=1</code> Disable thread synchronization support.Must only be used if there's a single thread. <code>IREE_FILE_IO_ENABLE=0</code> Disable file I/O. <code>IREE_TIME_NOW_FN</code> A function to return the system time. For the bare-metal systems, it can be set as <code>IREE_TIME_NOW_FN=\\\"\\{ return 0;\\}\\\"</code> as there's no asynchronous wait handling. <code>IREE_WAIT_UNTIL_FN</code> A function to wait until the given time in nanoseconds. Must match the signature <code>bool(uint64_t nanos)</code> and return false if the wait failed. <p>Examples of how to setup the CMakeLists.txt and .cmake file:</p> <ul> <li>IREE RISC-V toolchain cmake</li> <li>IREE Bare-Metal Arm Sample</li> <li>IREE Bare-Metal RV32 Sample</li> </ul>","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#bare-metal-execution-example","title":"Bare-metal execution example","text":"<p>See simple_embedding for generic platform to see how to use the IREE runtime library to build/run the IREE model for the bare-metal target.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/","title":"CPU deployment","text":"<p>IREE supports efficient program execution on CPU devices by using LLVM to compile all dense computations in each program into highly optimized CPU native instruction streams, which are embedded in one of IREE's deployable formats.</p> <p>To compile a program for CPU execution, pick one of IREE's supported executable formats:</p> Executable Format Description embedded ELF portable, high performance dynamic library system library platform-specific dynamic library (.so, .dll, etc.) VMVX reference target <p>At runtime, CPU executables can be loaded using one of IREE's CPU HAL drivers:</p> <ul> <li><code>local-task</code>: asynchronous, multithreaded driver built on IREE's \"task\"    system</li> <li><code>local-sync</code>: synchronous, single-threaded driver that executes work inline</li> </ul> <p>Todo</p> <p>Add IREE's CPU support matrix: what architectures are supported; what architectures are well optimized; etc.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the LLVM-based CPU compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation or iOS cross-compilation page if you are cross compiling for a mobile device. The <code>llvm-cpu</code> compiler backend is compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_TARGET_BACKEND_LLVM_CPU</code> CMake option is <code>ON</code> when configuring for the host.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>You will need to get an IREE runtime that supports the local CPU HAL driver, along with the appropriate executable loaders for your application.</p> <p>You can check for CPU support by looking for the <code>local-sync</code> and <code>local-task</code> drivers:</p> <pre><code>$ iree-run-module --list_drivers\n\n        cuda: CUDA (dynamic)\n  local-sync: Local execution using a lightweight inline synchronous queue\n  local-task: Local execution using the IREE multithreading task system\n      vulkan: Vulkan 1.x (dynamic)\n</code></pre>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The local CPU HAL drivers are compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_HAL_DRIVER_LOCAL_TASK</code> and <code>IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF</code> (or other executable loader) CMake options are <code>ON</code> when configuring for the target.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-and-run-a-program","title":"Compile and run a program","text":"<p>With the requirements out of the way, we can now compile a model and run it.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the <code>llvm-cpu</code> target:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=llvm-cpu \\\n    mobilenet_iree_input.mlir -o mobilenet_cpu.vmfb\n</code></pre> <p>Tip - CPU targets</p> <p>The <code>--iree-llvmcpu-target-triple</code> flag tells the compiler to generate code for a specific type of CPU. You can see the list of supported targets with <code>iree-compile --iree-llvmcpu-list-targets</code>, or pass \"host\" to let LLVM infer the triple from your host machine (e.g. <code>x86_64-linux-gnu</code>).</p> <pre><code>$ iree-compile --iree-llvmcpu-list-targets\n\n  Registered Targets:\n    aarch64    - AArch64 (little endian)\n    aarch64_32 - AArch64 (little endian ILP32)\n    aarch64_be - AArch64 (big endian)\n    arm        - ARM\n    arm64      - ARM64 (little endian)\n    arm64_32   - ARM64 (little endian ILP32)\n    armeb      - ARM (big endian)\n    riscv32    - 32-bit RISC-V\n    riscv64    - 64-bit RISC-V\n    wasm32     - WebAssembly 32-bit\n    wasm64     - WebAssembly 64-bit\n    x86        - 32-bit X86: Pentium-Pro and above\n    x86-64     - 64-bit X86: EM64T and AMD64\n</code></pre> <p>Tip - CPU features</p> <p>The <code>--iree-llvmcpu-target-cpu-features</code> flag tells the compiler to generate code using certain CPU \"features\", like SIMD instruction sets. Like the target triple, you can pass \"host\" to this flag to let LLVM infer the features supported by your host machine.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#run-a-compiled-program","title":"Run a compiled program","text":"<p>In the build directory, run the following command:</p> <pre><code>tools/iree-run-module \\\n    --device=local-task \\\n    --module=mobilenet_cpu.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["CPU"]},{"location":"guides/deployment-configurations/gpu-cuda/","title":"GPU deployment using CUDA","text":"<p>IREE can accelerate model execution on Nvidia GPUs using CUDA.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#prerequisites","title":"Prerequisites","text":"<p>In order to use CUDA to drive the GPU, you need to have a functional CUDA environment. It can be verified by the following steps:</p> <pre><code>nvidia-smi | grep CUDA\n</code></pre> <p>If <code>nvidia-smi</code> does not exist, you will need to install the latest CUDA Toolkit SDK.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the CUDA compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build the IREE compiler, then enable the CUDA compiler target with the <code>IREE_TARGET_BACKEND_CUDA</code> option.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that includes the CUDA HAL driver.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE from source, then enable the CUDA HAL driver with the <code>IREE_HAL_DRIVER_CUDA</code> option.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"<p>With the compiler and runtime ready, we can now compile programs and run them on GPUs.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=cuda \\\n    --iree-hal-cuda-llvm-target-arch=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_cuda.vmfb\n</code></pre> <p>Note that a cuda target architecture (<code>iree-hal-cuda-llvm-target-arch</code>) of the form <code>sm_&lt;arch_number&gt;</code> is needed to compile towards each GPU architecture. If no architecture is specified then we will default to <code>sm_35</code>.</p> <p>Here is a table of commonly used architectures:</p> CUDA GPU Target Architecture Nvidia K80 <code>sm_35</code> Nvidia P100 <code>sm_60</code> Nvidia V100 <code>sm_70</code> Nvidia A100 <code>sm_80</code>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#run-a-compiled-program","title":"Run a compiled program","text":"<p>Run the following command:</p> <pre><code>iree-run-module \\\n    --device=cuda \\\n    --module=mobilenet_cuda.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-metal/","title":"GPU deployment using Metal","text":"<p>Documentation coming soon!</p>","tags":["GPU","iOS"]},{"location":"guides/deployment-configurations/gpu-rocm/","title":"GPU deployment using ROCm","text":"<p>IREE can accelerate model execution on AMD GPUs using ROCm.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#prerequisites","title":"Prerequisites","text":"<p>In order to use ROCm to drive the GPU, you need to have a functional ROCm environment. It can be verified by the following steps:</p> <pre><code>rocm-smi | grep rocm\n</code></pre> <p>If <code>rocm-smi</code> does not exist, you will need to install the latest ROCm Toolkit SDK for Windows or Linux.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Currently ROCm is NOT supported for the Python interface.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build the IREE compiler, then enable the ROCm compiler target with the <code>IREE_TARGET_BACKEND_ROCM</code> option.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that includes the ROCm HAL driver.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE from source, then enable the experimental ROCm HAL driver with the <code>IREE_EXTERNAL_HAL_DRIVERS=rocm</code> option.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"<p>With the compiler and runtime ready, we can now compile programs and run them on GPUs.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=rocm \\\n    --iree-rocm-target-chip=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_rocm.vmfb\n</code></pre> <p>Note that IREE comes with bundled bitcode files, which are used for linking certain intrinsics on AMD GPUs. These will be used automatically or if the <code>--iree-rocm-bc-dir</code> is empty. As additional support may be needed for different chips, users can use this flag to point to an explicit directory. For example, in ROCm installations on Linux, this is often found under <code>/opt/rocm/amdgcn/bitcode</code>.</p> <p>Note that a ROCm target chip (<code>iree-rocm-target-chip</code>) of the form <code>gfx&lt;arch_number&gt;</code> is needed to compile towards each GPU architecture. If no architecture is specified then we will default to <code>gfx908</code>.</p> <p>Here is a table of commonly used architectures:</p> AMD GPU Target Chip AMD MI25 <code>gfx900</code> AMD MI50 <code>gfx906</code> AMD MI60 <code>gfx906</code> AMD MI100 <code>gfx908</code> AMD MI300A <code>gfx940</code> AMD MI300 <code>gfx942</code>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#run-a-compiled-program","title":"Run a compiled program","text":"<p>Run the following command:</p> <pre><code>iree-run-module \\\n    --device=rocm \\\n    --module=mobilenet_rocm.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-vulkan/","title":"GPU deployment using Vulkan","text":"<p>IREE can accelerate model execution on GPUs via Vulkan, a low-overhead graphics and compute API. Vulkan is cross-platform: it is available on many operating systems, including Android, Linux, and Windows. Vulkan is also cross-vendor: it is supported by most GPU vendors, including AMD, ARM, Intel, NVIDIA, and Qualcomm.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#support-matrix","title":"Support matrix","text":"<p>As IREE and the compiler ecosystem it operates within matures, more target specific optimizations will be implemented. At this stage, expect reasonable performance across all GPUs and for improvements to be made over time for specific vendors and architectures.</p> GPU Vendor Category Performance Focus Architecture ARM Mali GPU Mobile Good Valhall+ Qualcomm Adreno GPU Mobile Reasonable 640+ AMD GPU Desktop/server Good RDNA+ NVIDIA GPU Desktop/server Good Turing+","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#prerequisites","title":"Prerequisites","text":"<p>In order to use Vulkan to drive the GPU, you need to have a functional Vulkan environment. IREE requires Vulkan 1.1 on Android and 1.2 elsewhere. It can be verified by the following steps:</p> Android Linux Windows <p>Android mandates Vulkan 1.1 support since Android 10. You just need to make sure the device's Android version is 10 or higher.</p> <p>Run the following command in a shell:</p> <pre><code>vulkaninfo | grep apiVersion\n</code></pre> <p>If <code>vulkaninfo</code> does not exist, you will need to install the latest Vulkan SDK. Installing via LunarG's package repository is recommended, as it places Vulkan libraries and tools under system paths so it's easy to discover.</p> <p>If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.</p> <p>Run the following command in a shell:</p> <pre><code>vulkaninfo | grep apiVersion\n</code></pre> <p>If <code>vulkaninfo</code> does not exist, you will need to install the latest Vulkan SDK.</p> <p>If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-compiler","title":"Get the IREE compiler","text":"<p>Vulkan expects the program running on GPU to be expressed by the SPIR-V binary exchange format, which the model must be compiled into.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"<p>Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core <code>iree-compiler</code> package includes the SPIR-V compiler:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install iree-compiler\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade iree-compiler\n</code></pre> <p>Tip</p> <p><code>iree-compile</code> is installed to your python module installation path. If you pip install with the user mode, it is under <code>${HOME}/.local/bin</code>, or <code>%APPDATA%Python</code> on Windows. You may want to include the path in your system's <code>PATH</code> environment variable:</p> <pre><code>export PATH=${HOME}/.local/bin:${PATH}\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-compiler-from-source","title":"Build the compiler from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The SPIR-V compiler backend is compiled in by default on all platforms.</p> <p>Ensure that the <code>IREE_TARGET_BACKEND_VULKAN_SPIRV</code> CMake option is <code>ON</code> when configuring for the host.</p> <p>Tip</p> <p><code>iree-compile</code> will be built under the <code>iree-build/tools/</code> directory. You may want to include this path in your system's <code>PATH</code> environment variable.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-runtime","title":"Get the IREE runtime","text":"<p>Next you will need to get an IREE runtime that supports the Vulkan HAL driver.</p> <p>You can check for Vulkan support by looking for a matching driver and device:</p> <pre><code>$ iree-run-module --list_drivers\n\n        cuda: CUDA (dynamic)\n  local-sync: Local execution using a lightweight inline synchronous queue\n  local-task: Local execution using the IREE multithreading task system\n      vulkan: Vulkan 1.x (dynamic)\n</code></pre> <pre><code>$ iree-run-module --list_devices\n\n  cuda://GPU-00000000-1111-2222-3333-444444444444\n  local-sync://\n  local-task://\n  vulkan://00000000-1111-2222-3333-444444444444\n</code></pre>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-runtime-from-source","title":"Build the runtime from source","text":"<p>Please make sure you have followed the Getting started page to build IREE for Linux/Windows and the Android cross-compilation page for Android. The Vulkan HAL driver is compiled in by default on non-Apple platforms.</p> <p>Ensure that the <code>IREE_HAL_DRIVER_VULKAN</code> CMake option is <code>ON</code> when configuring for the target.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-and-run-a-program","title":"Compile and run a program","text":"<p>With the SPIR-V compiler and Vulkan runtime, we can now compile programs and run them on GPUs.</p>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-a-program","title":"Compile a program","text":"<p>The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.</p> <p>Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the <code>vulkan-spirv</code> target:</p> <pre><code>iree-compile \\\n    --iree-hal-target-backends=vulkan-spirv \\\n    --iree-vulkan-target-triple=&lt;...&gt; \\\n    mobilenet_iree_input.mlir -o mobilenet_vulkan.vmfb\n</code></pre> <p>Note</p> <p>Currently a target triple of the form <code>&lt;vendor/arch&gt;-&lt;product&gt;-&lt;os&gt;</code> is needed to compile towards a specific GPU architecture.</p> <p>We don't support the full spectrum here(1); the following table summarizes the currently recognized ones.</p> <p>If no triple is specified, then a safe but more limited default will be used.</p> <p>This is more of a mechanism to help us develop IREE itself--in the long term we want to perform multiple targetting to generate to multiple architectures if no target triple is given.</p> <ol> <li>It's also impossible to capture all details of a Vulkan implementation    with a target triple, given the allowed variances on extensions, properties,    limits, etc. So the target triple is just an approximation for usage.</li> </ol> GPU Vendor Target Triple ARM Mali GPU e.g. <code>valhall-unknown-{android30|android31}</code> Qualcomm Adreno GPU e.g. <code>adreno-unknown-{android30|android31}</code> AMD GPU e.g. <code>{rdna1|rdna2|rdna3}-unknown-unknown</code> NVIDIA GPU e.g. <code>{turing|ampere}-unknown-unknown</code> SwiftShader CPU <code>cpu-swiftshader-unknown</code>","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#run-a-compiled-program","title":"Run a compiled program","text":"<p>In the build directory, run the following command:</p> <pre><code>tools/iree-run-module \\\n    --device=vulkan \\\n    --module=mobilenet_vulkan.vmfb \\\n    --function=predict \\\n    --input=\"1x224x224x3xf32=0\"\n</code></pre> <p>The above assumes the exported function in the model is named as <code>predict</code> and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see <code>iree-run-module --help</code> for the format to specify concrete values.</p>","tags":["GPU","Vulkan"]},{"location":"guides/ml-frameworks/","title":"ML frameworks","text":"<p>IREE supports popular machine learning frameworks using the same underlying technology.</p> <pre><code>graph LR\n  accTitle: ML framework to runtime deployment workflow overview\n  accDescr {\n    Programs start in some ML framework.\n    Programs are imported into MLIR.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  A[ML frameworks]\n  B[Imported MLIR]\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>"},{"location":"guides/ml-frameworks/#supported-frameworks","title":"Supported frameworks","text":"<p>See guides on how to use each framework with IREE:</p> <ul> <li> JAX</li> <li> ONNX</li> <li> PyTorch</li> <li> TensorFlow and    TensorFlow Lite</li> </ul>"},{"location":"guides/ml-frameworks/#samples","title":"Samples","text":"<p>Check out the samples in IREE's <code>samples/</code> directory, as well as the iree-experimental repository.</p>"},{"location":"guides/ml-frameworks/#exportimport","title":"Export/Import","text":"<p>Each machine learning framework has some \"export\" mechanism that snapshots the structure and data in your program. These exported programs can then be \"imported\" into IREE's compiler by using either a stable import format or one of IREE's importer tools.</p> <p>This export/import process is specific to each frontend and typically involves a number of stages:</p> <ol> <li>Capture/trace/freeze the ML model into a graph</li> <li>Write that graph to an interchange format (e.g. SavedModel, TorchScript,    ONNX)</li> <li>Load the saved program into an import tool and convert to MLIR</li> <li>Legalize the graph's operations so only IREE-compatible operations remain</li> <li>Write the imported MLIR to a file</li> </ol> <p>This fully imported form can then be compiled indepedently of the source language and framework.</p>"},{"location":"guides/ml-frameworks/#compilation","title":"Compilation","text":"<p>IREE compiles MLIR files for specified sets of backends (CPU, GPU, etc). Each backend generates optimized native code custom to the input program and intended target platform. Once compiled, modules can be executed using IREE's runtime.</p> <p>See the deployment configuration guides for details on selecting a compiler backend and tuning options for your choice of target platform(s) or device(s).</p>"},{"location":"guides/ml-frameworks/#execution","title":"Execution","text":"<p>Compiled modules can be executed by selecting what compute devices to use, loading the module, and then executing it with the intended inputs. IREE provides several language bindings for its runtime API.</p>"},{"location":"guides/ml-frameworks/jax/","title":"JAX integration","text":"<p>Note</p> <p>IREE's JAX support is under development. This page is still under construction.</p>","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/jax/#overview","title":"Overview","text":"<p>IREE offers two ways to interface with JAX programs:</p> <ul> <li>An API for extracting and compiling full models ahead of time (AOT) for   execution apart from JAX. This API is being developed in the   iree-org/iree-jax repository.</li> <li>A PJRT plugin that adapts IREE as a native JAX backend for online / just in   time (JIT) use. This plugin is being developed in the   <code>integrations/pjrt</code> directory.</li> </ul>","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/onnx/","title":"ONNX support","text":"<p>Caution - under development</p> <p>Support for a broad set of ONNX operators and data types is an active investment area. See the ONNX Op Support tracking issue for the latest status.</p>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#overview","title":"Overview","text":"<p>Machine learning models using the Open Neural Network Exchange (ONNX) format can be deployed using the IREE compiler and runtime:</p> <pre><code>graph LR\n  accTitle: ONNX to runtime deployment workflow overview\n  accDescr {\n    Programs start as ONNX protobufs.\n    Programs are imported into MLIR using iree-import-onnx.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  A[\"ONNX\\n(protobuf)\"]\n  B[\"MLIR\\n(torch-mlir)\"]\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-onnx --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install ONNX:</p> <pre><code>python -m pip install onnx\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler[onnx] \\\n  iree-runtime\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler[onnx] \\\n  iree-runtime\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#quickstart","title":"Quickstart","text":"<ol> <li> <p>Start with a <code>.onnx</code> protobuf file, such as a model from    https://github.com/onnx/models.</p> </li> <li> <p>Convert the <code>.onnx</code> file into MLIR using the <code>iree-import-onnx</code> tool:</p> <pre><code>iree-import-onnx [model.onnx] -o [model.mlir]\n</code></pre> <p>This tool produces a MLIR file with the help of the torch-mlir project.</p> </li> <li> <p>Once imported, the standard set of tools and APIs available for any of    IREE's deployment configurations and    API bindings can be used:</p> <pre><code>iree-compile \\\n  model.mlir \\\n  --iree-hal-target-backends=llvm-cpu \\\n  -o model_cpu.vmfb\n\niree-run-module \\\n  model_cpu.vmfb \\\n  --device=local-task \\\n  --entry_function=... \\\n  --input=... \\\n  ...\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#samples","title":"Samples","text":"Code samples Curated op and model tests SHARK-TestSuite <code>e2eshark/onnx</code> Generated op tests SHARK-TestSuite <code>iree_tests/onnx</code> Importer tests torch-mlir <code>test/python/onnx_importer</code>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#troubleshooting","title":"Troubleshooting","text":"","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/onnx/#failed-to-legalize-operation-that-was-explicitly-marked-illegal","title":"Failed to legalize operation that was explicitly marked illegal","text":"<p>If you see an error compiling a converted .mlir file like this:</p> <pre><code>$ iree-compile model.mlir --iree-hal-target-backends=llvm-cpu -o model.vmfb\n\nmodel.mlir:507:12: error: failed to legalize operation 'torch.operator' that was explicitly marked illegal\n    %503 = torch.operator \"onnx.Identity\"(%arg0) : (!torch.vtensor&lt;[?],si64&gt;) -&gt; !torch.vtensor&lt;[?],si64&gt;\n           ^\n</code></pre> <p>There are several possible scenarios:</p> <ol> <li>The operator is not implemented, or the implementation is missing a case.    Search for a matching issue in one of these places:<ul> <li>https://github.com/llvm/torch-mlir/issues</li> <li>https://github.com/nod-ai/SHARK-Turbine/issues</li> </ul> </li> <li> <p>The operator is implemented but only for a more recent ONNX version. You can    try upgrading your .onnx file using the    ONNX Version Converter:</p> convert_onnx_model.py<pre><code>import onnx\noriginal_model = onnx.load_model(\"model.onnx\")\nconverted_model = onnx.version_converter.convert_version(original_model, 17)\nonnx.save(converted_model, \"model_17.onnx\")\n</code></pre> <p>and then attempting the convert -&gt; compile again:</p> <pre><code>iree-import-onnx model_17.onnx -o model_17.mlir\niree-compile model_17.mlir ...\n</code></pre> </li> </ol>","tags":["ONNX","Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/","title":"PyTorch + IREE =","text":"<p>Caution - under development</p> <p>We are still validating and fixing specific models. Between bug fixes in flight and releases running behind, we don't expect that you will be able to do a lot of advanced things without using nightly releases or working with us.</p> <p>Stay tuned and join the discussion in our Discord server's <code>#pytorch</code> channel.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#overview","title":"Overview","text":"<p>iree-turbine (rebrand pending from SHARK-Turbine) offers a tight integration between compatible versions of IREE, torch-mlir, and PyTorch.</p> <ul> <li> Seamless integration with standard PyTorch workflows</li> <li> Deployment support for running PyTorch models on cloud and edge devices</li> <li> General purpose model compilation and execution tools</li> </ul> <p>Both just-in-time (JIT) and ahead-of-time (AOT) workflows are supported:</p> <pre><code>graph LR\n  accTitle: PyTorch integration overview\n  accDescr {\n    PyTorch programs can be optimized within a Python session with\n    iree-turbine's just-in-time tools.\n    PyTorch programs can be exported out of Python to native binaries using\n    iree-turbine's ahead-of-time export toolkit.\n  }\n\n  subgraph Python\n    pytorch(PyTorch)\n    subgraph turbine [iree-turbine]\n      jit(\"Eager execution (JIT)\")\n      aot(\"Export toolkit (AOT)\")\n    end\n\n    pytorch --&gt; jit\n    jit --&gt; pytorch\n    pytorch --&gt; aot\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  aot -.-&gt; binary</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#prerequisites","title":"Prerequisites","text":"<p>Install a recent version of PyTorch (<code>2.3.0+</code>, prerelease as of April 2024):</p> <pre><code>python -m pip install \\\n  --pre --index-url https://download.pytorch.org/whl/test/cpu torch==2.3.0\n</code></pre> <p>Install iree-turbine:</p> <pre><code>python -m pip install iree-turbine\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#just-in-time-jit-execution","title":"Just-in-time (JIT) execution","text":"<p>Just-in-time integration allows for Python code using TorchDynamo to optimize PyTorch models/functions using IREE, all within an interactive Python session.</p> <pre><code>graph TD\n  accTitle: PyTorch JIT workflow overview\n  accDescr {\n    Programs start as either PyTorch nn.Module objects or callable functions.\n    Programs are compiled into optimized modules using torch.compile.\n    Within torch.compile, Dynamo runs the program through Turbine and IREE.\n  }\n\n  subgraph Python\n    input([nn.Module / function])\n\n    subgraph compile [\"torch.compile()\"]\n      direction LR\n      dynamo{{TorchDynamo}}\n      turbine{{iree-turbine}}\n      iree{{IREE}}\n      dynamo --&gt; turbine --&gt; iree\n    end\n\n    output([Optimized module])\n    input --&gt; compile --&gt; output\n  end</code></pre> <p>For deployment outside of Python, see the ahead-of-time sections below.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#quickstart","title":"Quickstart","text":"<p>Turbine integrates into PyTorch as a custom backend for <code>torch.compile</code>.</p> <p>Behind the scenes, PyTorch captures the structure of the input model into a computation graph and feeds that graph through to the selected backend compiler.</p> <pre><code>import torch\n\n# Define the `nn.Module` or Python function to run.\nclass LinearModule(torch.nn.Module):\n  def __init__(self, in_features, out_features):\n    super().__init__()\n    self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n    self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n  def forward(self, input):\n    return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Compile the program using the turbine backend.(1)\nopt_linear_module = torch.compile(linear_module, backend=\"turbine_cpu\")\n\n# Use the compiled program as you would the original program.\nargs = torch.randn(4)\nturbine_output = opt_linear_module(args)\n</code></pre> <ol> <li>Initial integration only supports CPU, but support for many of IREE's other    targets is coming soon.</li> </ol>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples","title":"Samples","text":"Code samples JIT compilation notebook Simple MLP eager <code>core/examples/eager_mlp/mlp_eager_simple.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#ahead-of-time-aot-export","title":"Ahead-of-time (AOT) export","text":"<p>The ahead-of-time toolkit allows developers to define a program's structure in Python and then export deployment-ready artifacts that can be used in IREE's deployment configurations via the API bindings.</p>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#simple-api","title":"Simple API","text":"<p>For simple models, a one-shot export API is available.</p> <pre><code>graph LR\n  accTitle: PyTorch simple AOT workflow overview\n  accDescr {\n    Programs start as PyTorch nn.Module objects.\n    Modules are exported using the \"aot\" API.\n    Exported outputs are then compiled to .vmfb files with executable binaries.\n  }\n\n  subgraph Python\n    input([nn.Module])\n    export([\"ExportOutput (MLIR)\"])\n    input -- \"aot.export()\" --&gt; export\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  export -. \"compile()\" .-&gt; binary</code></pre> <pre><code>import iree.runtime as ireert\nimport numpy as np\nimport shark_turbine.aot as aot\nimport torch\n\n# Define the `nn.Module` to export.\nclass LinearModule(torch.nn.Module):\n  def __init__(self, in_features, out_features):\n    super().__init__()\n    self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n    self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n  def forward(self, input):\n    return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Export the program using the simple API.\nexample_arg = torch.randn(4)\nexport_output = aot.export(linear_module, example_arg)\n\n# Compile to a deployable artifact.\nbinary = export_output.compile(save_to=None)\n\n# Use the IREE runtime API to test the compiled program.\nconfig = ireert.Config(\"local-task\")\nvm_module = ireert.load_vm_module(\n    ireert.VmModule.wrap_buffer(config.vm_instance, binary.map_memory()),\n    config,\n)\ninput = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)\nresult = vm_module.main(input)\nprint(result.to_host())\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_1","title":"Samples","text":"Code samples Simple AOT export notebook Import Whisper from  Hugging Face notebook Simple MLP export <code>core/examples/aot_mlp/mlp_export_simple.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#advanced-api","title":"Advanced API","text":"<p>For more complex models, an underlying advanced API is available that gives access to more features.</p> <pre><code>graph LR\n  accTitle: PyTorch advanced AOT workflow overview\n  accDescr {\n    Programs are represented using the aot.CompiledModule class.\n    CompiledModules can extend nn.Module objects, export globals, and set\n    shapes and dtypes for each function.\n    Modules are exported using the \"aot\" API.\n    Exported outputs are then compiled to .vmfb files with executable binaries.\n  }\n\n  subgraph Python\n    compiledmodule(\"aot.CompiledModule\\n\\n- extend nn.Module\\n- export globals\\n- set shapes/dtypes\")\n    export([\"ExportOutput (MLIR)\"])\n    compiledmodule -- \"aot.export()\" --&gt; export\n  end\n\n  subgraph Native\n    binary([\"binary (.vmfb)\"])\n  end\n\n  export -. \"compile()\" .-&gt; binary</code></pre> <p>Advanced export workflows can use the <code>aot.CompiledModule</code> class to define and constrain the structure of a program prior to compiling it.</p> <pre><code>import shark_turbine.aot as aot\n\n# A minimal program, with no functions or variables.\nclass BasicModule(aot.CompiledModule):\n  ...\n\n# Create an instance of the program and convert it to MLIR.\nfrom iree.compiler.ir import Context\ninstance = BasicModule(context=Context())\nmodule_str = str(aot.CompiledModule.get_mlir_module(instance))\n\nprint(module_str)\n# module @basic {\n# }\n</code></pre>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#exporting-functions","title":"Exporting functions","text":"<p>Exported functions are the API entry points into a compiled program.</p> <p>Simple feed-forward neural networks used for inference may have a single exported function (typically called \"forward\"), while more complex programs can have multiple computation functions, initialization functions, \"backward\" methods for training, state management functions, debugging functions, etc.</p> <ul> <li> <p>Each instance method on a <code>aot.CompiledModule</code>-derived class is exported.   These instance methods can include calls to other <code>aot</code> components, such as   <code>aot.jittable</code> compute functions:</p> <pre><code>class GetOnesModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_ones():\n    return torch.ones(3)\n\n  def get_ones(self):\n    return self.compute_ones()\n</code></pre> </li> <li> <p>Instance methods can use <code>aot.AbstractTensor</code> to specify data types:</p> <pre><code>class IntSumModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_sum(a, b):\n    return a + b\n\n  def sum_int32(\n    self,\n    a=aot.AbstractTensor(2, dtype=torch.int32),\n    b=aot.AbstractTensor(2, dtype=torch.int32),\n  ):\n    return self.compute_sum(a, b)\n</code></pre> </li> <li> <p>Shapes can be made dynamic using <code>aot.AbstractTensor</code> and <code>aot.jittable</code>   constraints:</p> <pre><code>class DynamicSumModule(aot.CompiledModule):\n  @aot.jittable\n  def compute_sum(a, b):\n    return a + b\n\n  def sum_dynamic(\n    self,\n    a=aot.AbstractTensor(None),\n    b=aot.AbstractTensor(None),\n  ):\n    return self.compute_sum(\n        a,\n        b,\n        constraints=[\n            a.dynamic_dim(0) == b.dynamic_dim(0),\n        ],\n    )\n</code></pre> </li> </ul>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#global-variables","title":"Global variables","text":"<p>Global variables are used to represent persistent state within a program instance.</p> <p>For example, they can be used to represent the weights and biases in a neural network, and exporting these as mutable variables can allow for setting their values independently at runtime.</p> <ul> <li> <p>Individual globals can be exported using <code>aot.export_global()</code>:</p> <pre><code>state_example = torch.zeros([1], dtype=torch.int32)\n\nclass SampleModule(aot.CompiledModule):\n  value = aot.export_global(state_example, mutable=True)\n\n  def get_value(self):\n    return self.value\n\n  def update_value(self, new_value=aot.abstractify(value)):\n    self.value = new_value\n</code></pre> </li> <li> <p>All named parameters on a <code>nn.Module</code> can be exported using   <code>export_parameters()</code>:</p> <pre><code>class SimpleParams(torch.nn.Module):\n  def __init__(self):\n    super().__init__()\n    self.classifier = torch.nn.Linear(20, 30)\n\n  def forward(self, x):\n    return self.classifier(x)\n\nm = SimpleParams()\n\nclass SimpleParamsModule(aot.CompiledModule):\n  params = aot.export_parameters(m)\n  compute = aot.jittable(m.forward)\n\n  def run(self, x=aot.AbstractTensor(128, 20)):\n    return self.compute(x)\n\n  # torch.nn.Linear has 'weight' and 'bias' variables:\n  #   https://pytorch.org/docs/stable/generated/torch.nn.Linear.html\n  # Add getters for both exported parameters.\n\n  def get_weight(self):\n    return self.params[\"classifier.weight\"]\n\n  def get_bias(self):\n    return self.params[\"classifier.bias\"]\n</code></pre> </li> </ul>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_2","title":"Samples","text":"Code samples Advanced AOT export notebook PyTorch dynamic shapes notebook AOT unit tests <code>core/tests/aot/</code> Dynamic MLP export <code>core/examples/aot_mlp/mlp_export_dynamic.py</code> stateless llama2 <code>models/turbine_models/custom_models/stateless_llama.py</code>","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/tensorflow/","title":"TensorFlow integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#overview","title":"Overview","text":"<p>IREE supports compiling and running TensorFlow programs represented as <code>tf.Module</code> classes or stored in the <code>SavedModel</code> format.</p> <pre><code>graph LR\n  accTitle: TensorFlow to runtime deployment workflow overview\n  accDescr {\n    Programs start as either TensorFlow SavedModel or tf.Module programs.\n    Programs are imported into MLIR as StableHLO.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  subgraph A[TensorFlow]\n    direction TB\n    A1[SavedModel]\n    A2[tf.Module]\n\n    A1 --- A2\n  end\n\n  subgraph B[MLIR]\n    B1[StableHLO]\n  end\n\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-tf --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install TensorFlow by following the     official documentation:</p> <pre><code>python -m pip install tf-nightly\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tf\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tf\n</code></pre> </li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#importing-models","title":"Importing models","text":"<p>IREE compilers transform a model into its final deployable format in several sequential steps. The first step for a TensorFlow model is to use either the <code>iree-import-tf</code> command-line tool or IREE's Python APIs to import the model into a format (i.e., MLIR) compatible with the generic IREE compilers.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#from-savedmodel-on-tensorflow-hub","title":"From SavedModel on TensorFlow Hub","text":"<p>IREE supports importing and using SavedModels from TensorFlow Hub.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#using-the-command-line-tool","title":"Using the command-line tool","text":"<p>First download the SavedModel and load it to get the serving signature, which is used as the entry point for IREE compilation flow:</p> <pre><code>import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\nprint(list(loaded_model.signatures.keys()))\n</code></pre> <p>Note</p> <p>If there are no serving signatures in the original SavedModel, you may add them by yourself by following \"Missing serving signature in SavedModel\".</p> <p>Then you can import the model with <code>iree-import-tf</code>. You can read the options supported via <code>iree-import-tf -help</code>. Using MobileNet v2 as an example and assuming the serving signature is <code>predict</code>:</p> <pre><code>iree-import-tf\n  --tf-import-type=savedmodel_v1 \\\n  --tf-savedmodel-exported-names=predict \\\n  /path/to/savedmodel -o iree_input.mlir\n</code></pre> <p>Tip</p> <p><code>iree-import-tf</code> is installed as <code>/path/to/python/site-packages/iree/tools/tf/iree-import-tf</code>. You can find out the full path to the <code>site-packages</code> directory via the <code>python -m site</code> command.</p> <p>Tip</p> <p><code>-tf-import-type</code> needs to match the SavedModel version. You can try both v1 and v2 if you see one of them gives an empty dump.</p> <p>Next, you can compile the model in <code>iree_input.mlir</code> for one of IREE's supported targets by following one of the deployment configuration guides.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#samples","title":"Samples","text":"Colab notebooks Training an MNIST digits classifier Edge detection Pretrained ResNet50 inference TensorFlow Hub import <p>End-to-end execution tests can be found in IREE's integrations/tensorflow/e2e/ directory.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#troubleshooting","title":"Troubleshooting","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#missing-serving-signature-in-savedmodel","title":"Missing serving signature in SavedModel","text":"<p>Sometimes SavedModels are exported without explicit serving signatures. This happens by default for TensorFlow Hub SavedModels. However, serving signatures are required as entry points for IREE compilation flow. You can use Python to load and re-export the SavedModel to give it serving signatures. For example, for MobileNet v2, assuming we want the serving signature to be <code>predict</code> and operating on a 224x224 RGB image:</p> <pre><code>import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\ncall = loaded_model.__call__.get_concrete_function(\n         tf.TensorSpec([1, 224, 224, 3], tf.float32))\nsignatures = {'predict': call}\ntf.saved_model.save(loaded_model,\n  '/path/to/resaved/model/', signatures=signatures)\n</code></pre> <p>The above will create a new SavedModel with a serving signature, <code>predict</code>, and save it to <code>/path/to/resaved/model/</code>.</p>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/","title":"TensorFlow Lite integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#overview","title":"Overview","text":"<p>IREE supports compiling and running TensorFlow Lite (TFLite) programs stored as TFLite FlatBuffers. These files can be imported into an IREE-compatible format then compiled to a series of backends.</p> <pre><code>graph LR\n  accTitle: TFLite to runtime deployment workflow overview\n  accDescr {\n    Programs start as TensorFlow Lite FlatBuffers.\n    Programs are imported into MLIR's TOSA dialect using iree-import-tflite.\n    The IREE compiler uses the imported MLIR.\n    Compiled programs are used by the runtime.\n  }\n\n  subgraph A[TFLite]\n    A1[FlatBuffer]\n  end\n\n  subgraph B[MLIR]\n    B1[TOSA]\n  end\n\n  C[IREE compiler]\n  D[Runtime deployment]\n\n  A -- iree-import-tflite --&gt; B\n  B --&gt; C\n  C --&gt; D</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#prerequisites","title":"Prerequisites","text":"<ol> <li> <p>Install TensorFlow by following the     official documentation:</p> <pre><code>python -m pip install tf-nightly\n</code></pre> </li> <li> <p>Install IREE packages, either by     building from source     or from pip:</p> Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tflite\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime \\\n  iree-tools-tflite\n</code></pre> </li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#importing-and-compiling","title":"Importing and Compiling","text":"<p>IREE's tooling is divided into two components: import and compilation.</p> <ol> <li>The import tool converts the TFLite FlatBuffer to an IREE compatible form,   validating that only IREE compatible operations remain. Containing a combination   of TOSA and IREE operations.</li> <li>The compilation stage generates the bytecode module for a list of targets,   which can be executed by IREE.</li> </ol>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-command-line-tools","title":"Using Command Line Tools","text":"<p>These two stages can be completed entirely via the command line.</p> <pre><code>WORKDIR=\"/tmp/workdir\"\nTFLITE_URL=\"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\nTFLITE_PATH=${WORKDIR}/model.tflite\nIMPORT_PATH=${WORKDIR}/tosa.mlir\nMODULE_PATH=${WORKDIR}/module.vmfb\n\n# Fetch the sample model\nwget ${TFLITE_URL} -O ${TFLITE_PATH}\n\n# Import the sample model to an IREE compatible form\niree-import-tflite ${TFLITE_PATH} -o ${IMPORT_PATH}\n\n# Compile for the CPU backend\niree-compile \\\n    --iree-input-type=tosa \\\n    --iree-hal-target-backends=llvm-cpu \\\n    ${IMPORT_PATH} \\\n    -o ${MODULE_PATH}\n</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-the-python-api","title":"Using the Python API","text":"<p>The example below demonstrates downloading, compiling, and executing a TFLite model using the Python API. This includes some initial setup to declare global variables, download the sample module, and download the sample inputs.</p> <p>Declaration of absolute paths for the sample repo and import all required libraries. The default setup uses the CPU backend as the only target. This can be reconfigured to select alternative targets.</p> <pre><code>import iree.compiler.tflite as iree_tflite_compile\nimport iree.runtime as iree_rt\nimport numpy\nimport os\nimport urllib.request\n\nfrom PIL import Image\n\nworkdir = \"/tmp/workdir\"\nos.makedirs(workdir, exist_ok=True)\n\ntfliteFile = \"/\".join([workdir, \"model.tflite\"])\njpgFile = \"/\".join([workdir, \"input.jpg\"])\ntfliteIR = \"/\".join([workdir, \"tflite.mlir\"])\ntosaIR = \"/\".join([workdir, \"tosa.mlir\"])\nbytecodeModule = \"/\".join([workdir, \"iree.vmfb\"])\n\nbackends = [\"llvm-cpu\"]\nconfig = \"local-task\"\n</code></pre> <p>The TFLite sample model and input are downloaded locally.</p> <pre><code>tfliteUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\njpgUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8_input.jpg\"\n\nurllib.request.urlretrieve(tfliteUrl, tfliteFile)\nurllib.request.urlretrieve(jpgUrl, jpgFile)\n</code></pre> <p>Once downloaded we can compile the model for the selected backends. Both the TFLite and TOSA representations of the model are saved for debugging purposes. This is optional and can be omitted.</p> <pre><code>iree_tflite_compile.compile_file(\n  tfliteFile,\n  input_type=\"tosa\",\n  output_file=bytecodeModule,\n  save_temp_tfl_input=tfliteIR,\n  save_temp_iree_input=tosaIR,\n  target_backends=backends,\n  import_only=False)\n</code></pre> <p>After compilation is completed we configure the VmModule using the local-task configuration and compiled IREE module.</p> <pre><code>config = iree_rt.Config(\"local-task\")\ncontext = iree_rt.SystemContext(config=config)\nwith open(bytecodeModule, 'rb') as f:\n  vm_module = iree_rt.VmModule.from_flatbuffer(config.vm_instance, f.read())\n  context.add_vm_module(vm_module)\n</code></pre> <p>Finally, the IREE module is loaded and ready for execution. Here we load the sample image, manipulate to the expected input size, and execute the module. By default TFLite models include a single function named 'main'. The final results are printed.</p> <pre><code>im = numpy.array(Image.open(jpgFile).resize((192, 192))).reshape((1, 192, 192, 3))\nargs = [im]\n\ninvoke = context.modules.module[\"main\"]\niree_results = invoke(*args)\nprint(iree_results)\n</code></pre>","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#samples","title":"Samples","text":"<ul> <li> <p>The tflitehub folder in the iree-experimental repository contains test scripts to compile, run, and compare various TensorFlow Lite models sourced from TensorFlow Hub.</p> </li> <li> <p>An example smoke test of the TensorFlow Lite C API is available here.</p> </li> </ul> Colab notebooks Text classification with TFLite and IREE","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#troubleshooting","title":"Troubleshooting","text":"<p>Failures during the import step usually indicate a failure to lower from TensorFlow Lite's operations to TOSA, the intermediate representation used by IREE. Many TensorFlow Lite operations are not fully supported, particularly those than use dynamic shapes. Please reach out on one of IREE's communication channels if you notice something missing.</p>","tags":["Python","TensorFlow"]},{"location":"reference/","title":"Reference pages","text":""},{"location":"reference/#api-bindings","title":"API bindings","text":"<p>IREE offers API bindings for compiling and running programs from various languages.</p> <ul> <li>Index page</li> </ul>"},{"location":"reference/#mlir-dialects","title":"MLIR dialects","text":"<p>Automatically generated documentation for the MLIR dialects defined in the IREE repository.</p> <ul> <li>Index page</li> </ul>"},{"location":"reference/#other-topics","title":"Other topics","text":"<ul> <li>Glossary</li> <li>Optimization options</li> <li>Extensions</li> </ul>"},{"location":"reference/extensions/","title":"Extension mechanisms","text":"<p>Note</p> <p>Much of this describes provisions for extension within IREE but until the core of the system has settled little work will be done to fully flesh-out and document them in detail. A large majority of things that would make someone want to extend IREE can instead be accomplished much easier and performantly using native MLIR dialects that are then processed by the IREE compiler.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#guidelines","title":"Guidelines","text":"<p>IREE has a compiler and runtime separation, a multi-layered architecture, and split between execution of \"host code\" that schedules compute-heavy work and SPMD \"device code\" that performs the bulk of compute operations. Each axis has a different set of extension mechanisms that can be used independently or combined.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#extension-philosophy","title":"Extension philosophy","text":"<p>Organized below are some of the mechanisms IREE provides for extending the core compiler and runtime and when they should(n't) be used. The goal of these progressively lower-level extension mechanisms is to make it easier for users to fall into the pit of success:</p> <p>Quote</p> <p>\"a well-designed system makes it easy to do the right things and annoying (but not impossible) to do the wrong things.\" - Jeff Atwood</p> <p>The amount of engineering complexity for initial bring-up and maintenance increases with each subsequently lower-level approach and it is best to start from the top and exit as fast as possible: this is a choose-your-own-adventure where you're trying to escape the dungeon with both the loot and your limbs . Avoid the temptation of immediately dropping down to making external C calls at runtime because that's how it's been done before as it's easier, more robust, and more performant to use the system as it is intended to be used.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-extend","title":"When to extend","text":"<p>The primary goal when extending any framework should first be to avoid extending it at all. There is no mechanism that is free - whether in terms of engineering effort to develop and maintain over time, include in compiler deployments, or include in runtime deployments. As a system scales in deployment configurations the available mechanisms for extension increase but so too does the chaos introduced by extensions that do not also scale with that design. Users are the only ones who can determine the tradeoffs they are willing to accept: for example, the mechanism to extend device code with a custom runtime call to a C function does not work on GPUs and gets significantly more complicated on CPUs as sandboxes/enclaves are used - but if the user scenario is for local process CPU-only execution that may not matter.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-inputscompilerruntime","title":"Where to extend (inputs/compiler/runtime)","text":"<p>Consider in normal software development when one would choose to write more code (possibly packaging it into a reusable library) vs. changing the programming language or compiler they are using to compile their code vs. changing the operating systems their code runs on. The further one gets from the problem they are trying to solve the more work, coordination, and maintenance is involved and though there are reasons to make changes across the stack they should be done only when a simpler solution would not suffice.</p> <p>An author will retain more control over their logic the closer they sit to the inputs to the compiler. IREE provides several mechanisms that try to keep control with the author and robust to changes in IREE or MLIR internals and it is strongly encouraged that those looking to extend take those routes first. Contributions that help everyone are very welcome but do have a higher cost and it's often much easier to design and justify upstream changes with working examples in forks or at higher levels of the stack.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-hostdevice","title":"Where to extend (host/device)","text":"<p>From a performance perspective the rule is to colocate code with the data it is acting on: tensor data, for example, should almost exclusively be manipulated by device code as tensors live on device. Attempting to use tensor data with host code will result in synchronization points and host/device transfers that can decimate performance. This can lead to seemingly paradoxical situations where swapping out compiler-generated code for a human-authored \"fast path\" can be slower than even the most naive compiler results. An important thing to keep in mind with compilers is that it is exceedingly difficult to produce code by hand that is consistently more performant across a broad range of deployments and the first temptation should always be to improve the compiler - extending it via other mechanisms when not required by the task is often just premature optimization.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#1-target-iree-input-dialects","title":"1. Target IREE input dialects","text":"<p>TL;DR</p> <p>Convert your custom ops into standard MLIR dialects.</p> <pre><code>+------------+      +--------+      +---------------+\n| Your input | -+-&gt; |  iree  | -+-&gt; | IREE compiler |\n+------------+  |   +--------+  |   +---------------+\n                |   +--------+  |\n                +-&gt; | linalg | -+\n                |   +--------+  |\n                |      ....     |\n</code></pre> <p>The easiest, cleanest, and most robust path to extend IREE is to make use of what MLIR is designed for: composing dialects and converting between them. IREE supports several input dialects such as <code>tosa</code>, <code>mhlo</code>, <code>linalg</code>, and the standard <code>arith</code>, <code>math</code>, <code>tensor</code>, and <code>scf</code> dialects. Any source IR that can be turned into that mix of dialects (directly or transitively) will work with the whole IREE pipeline for all deployment configurations and targets. If possible to express the computation in this form it will always be the best route to getting small deployments without the need to modify or include any additional code at runtime and run on all device types and execution modes.</p> <p>This mechanism can also be layered with any of the subsequent lower-level ones: if some part of the operation runs on the host and some part on device then decomposing it such that it contains as many standard ops for flow control as possible and linear algebra/custom ops for the dense math will reduce the engineering effort required on both sides and lead to an easier to maintain solution even if lower-level extension is required.</p> <p>A large majority of classic ML \"custom ops\" can be accomplished with this approach. When bringing up projects built on IREE it's best to concisely describe the operation in more elemental mathematical representations and then add optimizations where required knowing that things will still work even if those optimizations never happen.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros","title":"Pros","text":"<ul> <li>No IREE compiler or runtime code changes required.<ul> <li>Can use standard IREE packaged releases and tools.</li> <li>No versioning issues at runtime.</li> </ul> </li> <li>IREE's host/device partitioning can partition your code.</li> <li>Fusion and other compiler techniques (CSE/DCE/inlining/etc) work on your code.</li> <li>All target backends (CPU/GPU/accelerators/enclaves/etc) work.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons","title":"Cons","text":"<ul> <li>Input dialects cannot natively represent all possible programs (such as file   IO and other syscalls).</li> <li>Performance-sensitive host code (b-trees and other in-memory databases) will   run through the slower VM paths if not authored as dense compute.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use","title":"When to use","text":"<ul> <li> Targeting multiple MLIR toolchains of which IREE is just   one (as little to no IREE-specific code is required).</li> <li> Operation represents host code in addition to device code.</li> <li> All code is known statically or symbolically at   compile-time (instead of independently versioned libraries at runtime).</li> <li> Complex high-performance code not representable as linear algebra.</li> <li> External runtime interactions (file/network/user IO). Use   custom modules.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation","title":"Implementation","text":"<p>To make use of this approach one just needs to follow the standard MLIR dialect conversion behavior: add a dialect with ops, add a conversion pass, and run that pass before providing the resulting IR to the IREE compiler. See Creating a Dialect.</p> <p>Think of this like authoring C++ sources with templates that you compile into your application: Clang (and LLVM beyond) don't know about your library details and instead just process it as it would any other code. You can take the same source and pass it to GCC and it'll be robust to underlying changes in the system.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#2-extend-host-code-with-custom-modules","title":"2. Extend host code with custom modules","text":"<p>TL;DR</p> <p>Import MLIR functions in the compiler and custom modules at runtime.</p> <pre><code>// Main user module compiled by IREE:\nmodule @model {\n  // Declare a synchronous external function:\n  func.func private @my_custom_module.sync_func(%input: tensor&lt;?xf32&gt;) -&gt; i32\n  // Declare an asynchronous external function:\n  func.func private @my_custom_module.async_func(%input: tensor&lt;?xf32&gt;) -&gt; tensor&lt;?xf32&gt; attributes {\n    iree.abi.model = \"coarse-fences\",\n    nosideeffects\n  }\n  func.func @predict() {\n    ...\n    // Call a synchronous/blocking external function:\n    %sync_result = call @my_custom_module.sync_func(%sync_input) : (tensor&lt;?xf32&gt;) -&gt; i32\n    ...\n    ...\n    // Call an asynchronous/non-blocking external function:\n    %async_result = call @my_custom_module.async_func(%async_input) : (tensor&lt;?xf32&gt;) -&gt; tensor&lt;?xf32&gt;\n    ...\n  }\n}\n</code></pre> <p>IREE provides dynamic linking at runtime via its VM interfaces. For code that runs on the host and requires syscalls or calling out to existing libraries - such as file IO, text processing, and JPEG decoding - this is an easy way to interop without paying attention to the more complex details of device code. An IREE module compiled using custom modules is portable and dynamically deployable so long as the custom module is registered at runtime.</p> <p>This approach conceptually matches what normal native binaries do in an OS: imports are declared and at runtime they are resolved based on the available exports of modules in the system. Just as with normal systems engineering design of the API between modules is up to the user and depending on rigor can have several pitfalls but these problems and their solutions are not IREE specific and anyone who has designed a shared library interface can apply the same rules here in IREE around versioning, performance, etc. One does not add 2 integers via a syscall and the same holds here: custom modules and the functions within should perform a large amount of work to hide overheads involved in the cross-module calls and users must be aware that the compiler cannot optimize across the call boundaries.</p> <p>See the synchronous tensor I/O and asynchronous tensor I/O samples.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_1","title":"Pros","text":"<ul> <li>No IREE compiler code changes required.</li> <li>Produced artifacts are portable across IREE deployment configurations.</li> <li>Full system access is allowed - the VM just calls external functions.</li> <li>Runtime modules can be implemented (via shims) in other languages/runtimes.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_1","title":"Cons","text":"<ul> <li>Custom modules must be registered at runtime by the user.</li> <li>The VM custom module ABI goo must be authored by the user (such as with JNI or   pybind to move between java/python and C).</li> <li>All custom module code must be compiled and deployed regardless of how much   any modules use. The granularity of modules and their versioning is up to the   user.</li> <li>Custom module code cannot be optimized by the IREE compiler to avoid   host/device readbacks and unnecessary data type conversion.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_1","title":"When to use","text":"<ul> <li> Interactions with large libraries or system calls.</li> <li> Performance-sensitive host code that cannot easily be   represented as device code (like UTF-8 string transformation using libicu).</li> <li> Extensively using tensor resources.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_1","title":"Implementation","text":"<p>The runtime portion requires that the code be exported to the VM system by way of an <code>iree_vm_module_t</code> interface. A low-level native interface exists with minimal overhead and is used for example by the IREE HAL itself. There is also a C++ wrapper that is significantly easier to work with however it needs some performance improvements.</p> <p>Full end-to-end examples can be found under <code>samples/custom_modules/</code>:</p> <ul> <li>The basic sample shows how to add VM modules with custom types and take advantage of ABI features like fallback functions and optional imports.</li> <li>The synchronous tensor I/O sample shows a call taking and returning a tensor and performing blocking work.</li> <li>The asynchronous tensor I/O sample shows the same thing but with fences for asynchronous scheduling.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#3-extend-target-specific-device-conversion-patterns","title":"3. Extend target-specific device conversion patterns","text":"<p>TL;DR</p> <p>Add patterns to <code>iree/Compiler/Codegen/</code> to emit target code.</p> <p>The easiest and most robust path for specializations of device code is to emit such code mixed with the IREE compiler generated code at the highest possible level of abstraction within the target pipeline. For example, if the code can be represented with the <code>vector</code> dialect then inserting conversion patterns between <code>linalg</code> and <code>vector</code> enables the emitted code to be specialized further based on user configuration and optimized with the full set of available passes that run in the pipeline. For each level lower one goes the more flexibility they gain such as being able to emit inline assembly blocks that do anything while trading off generality and multi-targeting applicability.</p> <p>How much the tradeoff matters is based on the behavior of the extension. If a pattern changing a transcendental function to an approximation can operate at the vector level then all IREE deployment targets can benefit from the pattern and as new targets are made available they will automatically receive the benefits. In contrast, a pattern at the vector level that turns generic vector operations into architecture-specific LLVM intrinsics by its nature only pertains to a single target family and can be done at a lower level. As a rule of thumb if a particular pattern is going to need ~N implementations for ~N targets that are all mostly the same it's better to try to move that higher in the stack.</p> <p>At this point the complexity of extending things is still fairly constrained: a C++ pass or pattern is verified with normal lit tests and can be upstreamed easily either into MLIR or IREE (a large number of IREE patterns are upstreamed, benefiting all users of MLIR). Cross-compilation and versioning are not a factor and the IREE artifacts can be considered durable at a coarse level (outside of major target architectural changes).</p> <p>Note that depending on the target there are various mechanisms for representing code in MLIR, up to including inline assembly snippets in IR via <code>llvm.inline_asm</code>.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_2","title":"Pros","text":"<ul> <li>Not limited to what is possible to represent in any particular MLIR dialect.</li> <li>Rich target configuration available; multiple passes can contribute info.</li> <li>Produced executable binaries are hermetic and no runtime changes are required.</li> <li>Specialization can happen in MLIR dialects like <code>linalg</code> or <code>vector</code> as well   as target-specific representations like SPIR-V and LLVM IR.</li> <li>The compiler can perform deep optimizations across both the generated code and   the provided code (hoisting/loop invariant code motion/cse/etc).</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_2","title":"Cons","text":"<ul> <li>Requires implementing the patterns as code in the IREE compiler or via TBD   interfaces.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_2","title":"When to use","text":"<ul> <li> Code that must be emitted during target lowering - such as   something optimizing for a particular CPU architecture.</li> <li> Hot code mixed with generated code at a fine granularity   (within the innermost loop).</li> <li> External existing hand-authored libraries. Either statically   or dynamically link instead.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_2","title":"Implementation","text":"<p>There are several ways to author patterns and passes in MLIR. As examples:</p> <ul> <li>A majority of patterns are authored in C++ using PatternRewriter.</li> <li>PDL is an MLIR-based way to   express rewrite operations with strong typing, compile-time verification, and   easily-readable and less-verbose IR.</li> <li><code>linalg</code> uses a python-based DSL   for defining some of its extended ops.</li> </ul> <p>There are many examples within both MLIR and IREE, one specifically being the polynomial approximation expansion patterns.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#4-include-external-target-specific-device-code","title":"4. Include external target-specific device code","text":"<p>TL;DR</p> <p>Statically link external object files into IREE executables.</p> <p>For large bodies of existing device code or library calls that are available for static linkage the work involved to reimplement them at higher levels of the stack can be cost prohibitive even if it leads to better results. In these cases just as with a normal toolchain one would just want to declare an external function, call it, and add the object file to the linker command line. In IREE the same can be performed by way of taking compatible bitcode or native object files and linking them in with the generated code. An MLIR pattern would declare and emit the call and the target-specific IREE linker would pull in the objects.</p> <p>As the linking behavior varies per target (for example, some targets like SPIR-V don't have traditional linkers) how this is performed is up to the IREE target backends. The complexity involved in producing the object files to link will also vary per-backend and the complexity of the deployment: cross-compiling for multiple architectures or compilation modes (ASAN, etc) will require unique copies of the object files matching that precise configuration.</p> <p>At this point generality is largely out as is the ability to cleanly upstream such files. It should be apparent how a few dozen lines of C++ or PDL that avoids the need for any of this complexity is more appealing. In extremely specific cases of a single platform/architecture/version for a single program deployed via a specific artifact composition it's not so bad but IREE is designed such that extreme specificity is an optional mode of the more general solution. This does not mean this mechanism is not useful in some situations and only that it should be a last-resort when one of the easier to manage solutions is not viable - not a shortcut to avoid writing some C++ patterns.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_3","title":"Pros","text":"<ul> <li>Works with hand-authored code in compatible object files from any toolchain.</li> <li>No IREE runtime changes required.<ul> <li>All deployment modes still work, including multi-targeting.</li> <li>No versioning concerns as custom code is included in artifacts.</li> </ul> </li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_3","title":"Cons","text":"<ul> <li>Users must provide per-target precompiled object files on disk.</li> <li>IREE compiler changes are still needed for generating the external calls.</li> <li>Though LTO may be able to optimize across the calls it is not guaranteed.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_3","title":"When to use","text":"<ul> <li> Existing math libraries or architecture-specific functions   that cannot be ported into a more MLIR-friendly form.</li> <li> Mixing in hand-authored code written in C/rust/etc with   generated code from MLIR.</li> <li> External code can be represented as either <code>linalg</code>,   <code>vector</code>, or LLVM IR. Use target-specific conversion patterns instead.</li> <li> External code size is large and unlikely to benefit from   link-time optimizations (such as something like libjpeg). Dynamically link   instead.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_3","title":"Implementation","text":"<p>As the linking behavior varies per target backend there is no general solution at this level: if targeting the CPU then the system native linker or lld need to be provided the object files, while SPIR-V will need to merge the SPIR-V binaries directly, and Metal shader libraries will need to be constructed with the Apple-specific <code>metallib</code> tooling. Producing these files and performing the linking is outside the scope of IREE.</p> <p>If the files can be acquired then compiler changes will be required to emit calls to them and invoke the linker with the the files.</p> <p>On the CPU an alternative is to use the static library output mode where IREE produces an object file and then the user invokes the linker themselves; this still requires the compiler changes to emit the calls but avoids needing to teach the compiler how to link the files.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#5-dynamically-link-target-specific-device-code-cpu-only","title":"5. Dynamically link target-specific device code (CPU only)","text":"<p>TL;DR</p> <p>Dynamically link external C functions at runtime from device code.</p> <p>It is pitch black. You are likely to be eaten by a grue.</p> <p>This is the lowest-level integration in the system and is designed to act as an escape hatch and - as with any emergency escape hatch - it's not designed for ergonomics. Users should try first to come in through the door and attempting to use this mechanism should trigger alarms about the approach being attempted.</p> <p>IREE's execution model for device code and native machine binary deployment mechanisms are designed with several constraints in order to make all of the above approaches possible and performant. Calling arbitrary C functions from deep within the system can introduce subtle (and not-so-subtle) bugs that are extremely difficult to track down and versioning between the compiler emitting the calls and the runtime providing the implementations can cause skew unless held carefully. Consider the methods added here like syscalls in that they must be extremely focused and if they are ever likely to change (including being removed) then care will be needed just as with versioning or redirecting a syscall. Designing good stable interfaces is hard and a classic pit of failure.</p> <p>Some things to note:</p> <ul> <li>Device code executes in a tiled fashion and single dispatches may invoke the   same function many times from many threads concurrently to perform   the larger work.</li> <li>Tiles may execute in any order and on any thread; performing fine-grained   locking within the tile can lead to deadlocks.</li> <li>Device code is stateless in order to allow for access restrictions and caching   across multiple loaded models - any library state required must be externally   managed via process globals.</li> <li>Device code may be running out-of-process (sandbox/enclave) and the library   functions must be available where the dispatches run and not where they are   launched (such as being linked into the sandbox binary, if separate from the   main process binary).</li> <li>The stack must be used to pass arguments/results to external calls via a   single pointer and there is no libffi-like functionality for magically calling   arbitrary C functions. Users must provide the shims they need.</li> <li>Thread-local storage is unavailable in the called code (it may be usable, but   it is not guaranteed it'll work on all platforms and leaks are likely).</li> <li>No heap allocator is provided and the use of libc malloc is unsupported.</li> </ul> <p>Most of the constraints here come from the SPMD parallelism model, platform-agnostic deployment format, and overall data-oriented design of IREE. Code operating in this fashion has a certain shape and that is usually not the same as big legacy single-threaded CPU-focused BLAS libraries that perform their own caching, internal thread and state management, and other shenanigans. IREE is not designed to wrap such things and if any of these notes are issues it is more an indicator that the approach needs adjustment than anything else. Trying to bypass or workaround the constraints is possible - after all IREE is an open source project and any user is welcome to fork it - but unsupported by the core IREE team.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_4","title":"Pros","text":"<ul> <li>Function resolution at runtime is orthogonal to compiler target specification.</li> <li>Machine code can be shared between the application and IREE artifacts.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_4","title":"Cons","text":"<ul> <li>IREE compiler and runtime must both be modified.</li> <li>Deeper integration with the IREE codegen compiler infrastructure required.</li> <li>ABI versioning complexity between compiler and runtime.</li> <li>Runtimes must ship the imports for the lifetime of any artifact compiled to   use them.<ul> <li>Humans are bad at predicting the future.</li> <li>Using the same artifact in different binaries at runtime requires changes   to each binary - including those that may not be owned by the person   producing the artifact.</li> <li>Weak imports and conditional usage can help but still leads to bloat.</li> </ul> </li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_4","title":"When to use","text":"<ul> <li> Calling into opaque closed-source BLAS-like microkernel   libraries.</li> <li> Any other cases covered above can be used, especially   microkernels that can be represented in MLIR or as statically linked   libraries.</li> </ul>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_4","title":"Implementation","text":"<p>The compiler is changed to produce calls to imports via a dynamic import table provided to each dispatch function. The import table is declared in the executable library for use at runtime. Runtime applications register an import provider to resolve named symbols in the import table to C functions that marshal arguments and results.</p> <p>The compiler-side needs some additional work but an example is included here: Issue 7504. The runtime-side is complete and resolution is performed by a user-supplied <code>iree_hal_executable_import_provider_t</code>.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/","title":"Glossary","text":"<p>IREE exists in an ecosystem of projects and acts as a bridge between machine learning frameworks and a variety of hardware platforms. This glossary outlines some of those projects and technologies.</p> <p>Something missing?</p> <p>Don't see a project of technology here that you think should be? We welcome contributions on our GitHub page!</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#jax","title":"JAX","text":"<p>JAX is Python framework supporting high-performance machine learning research by bridging automatic differentiation and ML compilers like XLA and IREE.</p> <p>See the JAX Integration guide for details on how to use JAX programs with IREE.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#mlir","title":"MLIR","text":"<p>Multi-Level Intermediate Representation (MLIR) is the compiler framework that IREE is built around. Beyond the tooling this includes a set of common dialects and transformations that IREE utilizes for its code generation system.</p> <p>For general discussion on MLIR see the project's discourse forum.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#linalg","title":"Linalg","text":"<p>Linalg is an MLIR dialect that defines Linear Algebra operations in a generalized fashion by modeling iteration spaces together with compute payloads. Linalg includes a set of commonly used operations as well as generic interfaces.</p> <p>IREE uses the Linalg dialect during its code generation pipeline to define tensor operations then generate loop structures for its various backend targets.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#openxla","title":"OpenXLA","text":"<p>OpenXLA is a community-driven, open source ML compiler ecosystem.</p> <p>IREE interfaces with some of the OpenXLA projects, such as StableHLO.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#pytorch","title":"PyTorch","text":"<p>PyTorch is an optimized tensor library for deep learning.</p> <p>PyTorch uses the Torch-MLIR project to interface with projects like IREE. See the PyTorch Integration guide for details on how to use PyTorch programs with IREE.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#spir-v","title":"SPIR-V","text":"<p>SPIR-V is a shader and kernel intermediate language for expressing parallel computation typically used for GPUs. It serves as a hardware agnostic assembly format for distributing complex, computationally intensive programs.</p> <p>IREE uses the SPIR-V MLIR Dialect in its code generation pipeline for Vulkan and other compute APIs.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#stablehlo","title":"StableHLO","text":"<p>StableHLO is a set of versioned high-level operations (HLOs) for ML models with backward and forward compatibility guarantees. StableHLO aims to improve interoperability between frameworks (such as TensorFlow, JAX, and PyTorch) and ML compilers.</p> <p>StableHLO has both a specification and an MLIR dialect.</p> <p>IREE uses the StableHLO MLIR Dialect as one of its input formats.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tosa","title":"TOSA","text":"<p>Tensor Operator Set Architecture (TOSA) provides a set of tensor operations commonly employed by Deep Neural Networks. TOSA defines accuracy and compatibility constraints so frameworks that use it can trust that applications will produce similar results on a variety of hardware targets.</p> <p>TOSA has both a specification and an MLIR dialect.</p> <p>IREE uses the TOSA MLIR dialect as one of its input formats.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tflite","title":"TFLite","text":"<p>TensorFlow Lite (TFLite) is a library for deploying models on mobile and other edge devices.</p> <p>IREE supports running TFLite programs that have been imported into MLIR using the TOSA dialect. See the TFLite Integration guide for details on how to use TFLite programs with IREE.</p> <p>IREE also has bindings for the TFLite C API, see the <code>runtime/bindings/tflite/</code> directory for details.</p>","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/optimization-options/","title":"Optimization options","text":"<p>This page documents various supported flags for optimizing IREE programs. Each is presented with its English name, flag to enable/disable, and default state.</p> <p>These flags can be passed to the:</p> <ul> <li><code>iree-compile</code> command line tool</li> <li><code>extra_args=[\"--flag\"]</code> argument to <code>iree.compiler.tools</code> Python wrappers</li> <li>In-process Python compiler API   <code>iree.compiler.transforms.iree-compile.CompilerOptions(\"--flag\", \"--flag2\")</code>   constructor</li> <li><code>ireeCompilerOptionsSetFlags()</code> compiler C API function</li> </ul>"},{"location":"reference/optimization-options/#high-level-program-optimizations","title":"High level program optimizations","text":""},{"location":"reference/optimization-options/#constant-evaluation-iree-opt-const-eval-on","title":"Constant evaluation (<code>--iree-opt-const-eval</code> (on))","text":"<p>Performs compile-time evaluation of any global initializers which produce the initial values for global constants, storing the global directly in the program as constant data. This extracts such constant program fragments and recursively compiles them, using the runtime to evaluate the results.</p> <p>Note that this only has any effect on computations in module initializer functions, not free-standing operations in the program which may produce constant-derived results. See <code>--iree-opt-const-expr-hoisting</code> for options to optimize these.</p>"},{"location":"reference/optimization-options/#constant-expression-hoisting-iree-opt-const-expr-hoisting-off","title":"Constant expression hoisting (<code>--iree-opt-const-expr-hoisting</code> (off))","text":"<p>Identifies all trees of constant expressions in the program and uses a heuristic to determine which would be profitable to hoist into global initializers for evaluation at module load. Together with <code>--iree-opt-const-eval</code>, this will convert eligible trees of expressions to purely static data embedded in the module.</p> <p>The heuristic is currently relatively primitive, using static information to disable hoisting of leaf operations which are metadata only (i.e. broadcasts, etc) or are expected to fold away as part of operator fusion. Notably, the current heuristic is likely to pessimize module size in the case of complicated programs with trees of constant, large tensors.</p>"},{"location":"reference/optimization-options/#numeric-precision-reduction-iree-opt-numeric-precision-reduction-off","title":"Numeric precision reduction (<code>--iree-opt-numeric-precision-reduction</code> (off))","text":"<p>Analyzes program constant data and program flow to identify math operations which can be safely evaluated with reduced precision (currently with a minimum of 8bit integers but being extended to infer any bit depth) and inserts appropriate casts. In conjunction with Constant Expression Hoisting, Constant Evaluation and other automatic optimizations, this can produce programs where large amounts (up to the whole) have had their numeric operations and constant data rewritten to lower precision types.</p> <p>This feature is actively evolving and will be the subject of dedicated documentation when ready.</p>"},{"location":"reference/optimization-options/#strip-debug-assertions-iree-opt-strip-assertions-off","title":"Strip Debug Assertions (<code>--iree-opt-strip-assertions</code> (off))","text":"<p>Strips all <code>std.assert</code> ops in the input program after useful information for optimization analysis has been extracted. Assertions provide useful user-visible error messages but can prevent critical optimizations. Assertions are not, however, a substitution for control flow and frontends that want to check errors in optimized release builds should do so via actual code - similar to when one would <code>if (foo) return false;</code> vs. <code>assert(foo);</code> in a normal program.</p>"},{"location":"reference/bindings/","title":"API bindings","text":"<p>API bindings allow for programmatic use of IREE's compiler and runtime components. The core IREE project is written in C<sup>1</sup>, allowing for API bindings to be written in a variety of other languages.</p> <p>Something missing?</p> <p>Want to use another language? Looking for something specific out of one of those already listed?</p> <p>We welcome discussions on our communication channels and contributions on our GitHub page!</p>"},{"location":"reference/bindings/#official-api-bindings","title":"Official API bindings","text":"<p>Members of the core project team and other partner groups maintain these official bindings:</p> Language Compiler API? Runtime API? Published packages? C/C++  Supported  Supported  Unsupported Python  Supported  Supported  Supported"},{"location":"reference/bindings/#cc","title":"C/C++","text":"<p>See the C API reference page.</p>"},{"location":"reference/bindings/#python","title":"Python","text":"<p>See the Python reference page.</p>"},{"location":"reference/bindings/#unofficial-and-experimental-api-bindings","title":"Unofficial and experimental API bindings","text":"<p>Members of our developer community have authored bindings using other languages:</p> Language Compiler API? Runtime API? Published packages? JavaScript  Experimental  Experimental  Unsupported Java  Unsupported  Experimental  Unsupported Julia  Experimental  Experimental  Unsupported Rust  Unsupported  Experimental  Experimental"},{"location":"reference/bindings/#javascript","title":"JavaScript","text":"<ul> <li>JavaScript bindings for WebAssembly and WebGPU are under development in IREE's <code>experimental/web/</code> directory.</li> </ul>"},{"location":"reference/bindings/#java","title":"Java","text":"<ul> <li>Java TFLite bindings were developed at one point in IREE's <code>runtime/bindings/tflite/java</code> directory.</li> </ul>"},{"location":"reference/bindings/#julia","title":"Julia","text":"<ul> <li>Coil.jl is an experimental package to lower and execute Julia tensor operations to IREE.</li> </ul>"},{"location":"reference/bindings/#rust","title":"Rust","text":"<ul> <li>iree-rs is a crate containing rustic bindings for the IREE runtime.</li> </ul> <ol> <li> <p>with some C++ tools and utilities\u00a0\u21a9</p> </li> </ol>"},{"location":"reference/bindings/c-api/","title":"C API bindings","text":""},{"location":"reference/bindings/c-api/#overview","title":"Overview","text":"<p>The IREE compiler and IREE runtime both have their own C/C++ APIs. This page introduces the available APIs and describes how to use them from your applications.</p> <p>Note</p> <p>There are multiple ways to distribute and depend on C/C++ projects, each with varying levels of portability, flexibility, and toolchain compatibility. IREE aims to support common configurations and platforms.</p>"},{"location":"reference/bindings/c-api/#compiler-api","title":"Compiler API","text":"<p>The IREE compiler is structured as a monolithic shared object with a dynamic plugin system allowing for extensions. The shared object exports symbols for versioned API functions.</p> <pre><code>graph TD\n  accTitle: IREE compiler linkage model diagram\n  accDescr {\n    The libIREECompiler.so or IREECompiler.dll shared object contains pipelines,\n    target backends, and general passes as private implementation details.\n    Compiler plugins interface with the compiler shared object to extend it with\n    custom targets, dialects, etc.\n    Applications interface with the compiler shared object through the compiler\n    C API's exported symbols.\n  }\n\n  subgraph compiler[libIREECompiler.so / IREECompiler.dll]\n    pipelines(\"Pipelines\n\n    \u2022 Flow\n    \u2022 Stream\n    \u2022 etc.\")\n\n    targets(\"Target backends\n\n    \u2022 llvm-cpu\n    \u2022 vulkan-spirv\n    \u2022 etc.\")\n\n    passes(\"General passes\n\n    \u2022 Const eval\n    \u2022 DCE\n    \u2022 etc.\")\n  end\n\n  plugins(\"Compiler plugins\n\n    \u2022 Custom targets\n    \u2022 Custom dialects\n    \u2022 etc.\")\n\n  application(Your application)\n\n  compiler &lt;-- \"Plugin API&lt;br&gt;(static or dynamic linking)\" --&gt; plugins\n  compiler -. \"Compiler C API&lt;br&gt;(exported symbols)\" .-&gt; application</code></pre> <p>API definitions can be found in the following locations:</p> Source location Overview <code>iree/compiler/embedding_api.h</code> Top-level IREE compiler embedding API <code>iree/compiler/PluginAPI/</code> directory IREE compiler plugin API <code>mlir/include/mlir-c/</code> directory MLIR C API headers"},{"location":"reference/bindings/c-api/#concepts","title":"Concepts","text":"<p>The compiler API is centered around running pipelines to translate inputs to artifacts. These are modeled via sessions, invocations, sources, and outputs.</p> <pre><code>stateDiagram-v2\n  accTitle: IREE compiler session and invocation state diagram\n  accDescr {\n    Input files are opened (or buffers are wrapped) as sources in a session.\n    Sources are parsed into invocations, which run pipelines.\n    Output files are written (or buffers are mapped) for compilation artifacts.\n    Sessions can contain multiple sources and run multiple invocations.\n  }\n\n  direction LR\n  InputFile --&gt; Source1 : open file\n  InputBuffer --&gt; Source2 : wrap buffer\n\n  state Session {\n    Source1 --&gt; Invocation1\n    Source2 --&gt; Invocation2\n    Invocation1 --&gt; Invocation1 : run pipeline\n    Invocation2 --&gt; Invocation2 : run pipeline\n  }\n\n  Invocation1 --&gt; Output1File   : write file\n  Invocation1 --&gt; Output1Buffer : map memory\n  Invocation2 --&gt; Output2Buffer : map memory</code></pre>"},{"location":"reference/bindings/c-api/#sessions","title":"Sessions","text":"<p>A session (<code>iree_compiler_session_t</code>) is a scope where one or more invocations can run.</p> <ul> <li>Internally, sessions consist of an <code>MLIRContext</code> and a private set of   options.</li> <li>Sessions may activate available plugins based on their options.</li> </ul>"},{"location":"reference/bindings/c-api/#invocations","title":"Invocations","text":"<p>An invocation (<code>iree_compiler_invocation_t</code>) is a discrete run of the compiler.</p> <ul> <li>Invocations run pipelines, consisting of passes, to translate from   sources to outputs.</li> </ul>"},{"location":"reference/bindings/c-api/#sources","title":"Sources","text":"<p>A source (<code>iree_compiler_source_t</code>) represents an input program, including operations and data.</p> <ul> <li>Sources may refer to files or buffers in memory.</li> </ul>"},{"location":"reference/bindings/c-api/#outputs","title":"Outputs","text":"<p>An output (<code>iree_compiler_output_t</code>) represents a compilation artifact.</p> <ul> <li>Outputs can be standalone files or more advanced streams.</li> </ul>"},{"location":"reference/bindings/c-api/#plugins","title":"Plugins","text":"<p>A plugin extends the compiler with some combination of target backends, options, passes, or pipelines. For documentation on compiler plugins, see <code>compiler/PluginAPI/README.md</code>.</p>"},{"location":"reference/bindings/c-api/#usage","title":"Usage","text":"<p>This snippet shows the general layout of the API. For working examples, see the samples below.</p> <p>To build a custom tool using the compiler API:</p> CMakeLists.txt<pre><code>set(_IREE_COMPILER_API \"${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler\")\ntarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_COMPILER_API})\ntarget_link_libraries(${_NAME} iree_compiler_bindings_c_loader)\n</code></pre> iree_compiler_demo.c<pre><code>#include &lt;iree/compiler/embedding_api.h&gt;\n#include &lt;iree/compiler/loader.h&gt;\n\nint main(int argc, char** argv) {\n  // Load the compiler library then initialize it.\n  ireeCompilerLoadLibrary(\"libIREECompiler.so\");\n  ireeCompilerGlobalInitialize();\n\n  // Create a session to track compiler state and set flags.\n  iree_compiler_session_t *session = ireeCompilerSessionCreate();\n  ireeCompilerSessionSetFlags(session, argc, argv);\n\n  // Open a file as an input source to the compiler.\n  iree_compiler_source_t *source = NULL;\n  ireeCompilerSourceOpenFile(session, \"input.mlir\", &amp;source);\n\n  // Use an invocation to compile from the input source to one or more outputs.\n  iree_compiler_invocation_t *inv = ireeCompilerInvocationCreate(session);\n  ireeCompilerInvocationPipeline(inv, IREE_COMPILER_PIPELINE_STD);\n\n  // Output the compiled artifact to a file.\n  iree_compiler_output_t *output = NULL;\n  ireeCompilerOutputOpenFile(\"output.vmfb\", &amp;output);\n  ireeCompilerInvocationOutputVMBytecode(inv, output);\n\n  // Cleanup state.\n  ireeCompilerInvocationDestroy(inv);\n  ireeCompilerOutputDestroy(output);\n  ireeCompilerSourceDestroy(source);\n  ireeCompilerSessionDestroy(session);\n  ireeCompilerGlobalShutdown();\n}\n</code></pre>"},{"location":"reference/bindings/c-api/#samples","title":"Samples","text":"Project Source Description iree-org/iree-template-compiler-cmake <code>hello_compiler.c</code> Compiler application template iree-org/iree <code>integrations/pjrt/.../iree_compiler.cc</code> JIT for TensorFlow + JAX to IREE iree-org/iree <code>compiler/plugins</code> In-tree supported compiler plugins iree-org/iree <code>samples/compiler_plugins/</code> In-tree sample compiler plugins nod-ai/iree-amd-aie <code>plugins/.../iree-amd-aie</code> Early-phase plugins for interfacing with AMD AIE accelerators"},{"location":"reference/bindings/c-api/#runtime-api","title":"Runtime API","text":"<p>The IREE runtime is structured as a modular set of library components. Each component is designed to be linked into applications directly and compiled with LTO style optimizations.</p> <p>The low level library components can be used directly or through a higher level API.</p> High level APILow level API <p>The high level 'runtime' API sits on top of the low level components. It is relatively terse but does not expose the full flexibility of the underlying systems.</p> <pre><code>graph TD\n  accTitle: IREE runtime high level API diagram\n  accDescr {\n    The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n    their own types and API methods.\n    A high level \"runtime API\" sits on top of these component APIs.\n    Applications can interface indirectly with the IREE runtime via this\n    high level runtime API.\n  }\n\n  subgraph iree_runtime[IREE Runtime]\n    subgraph base\n      base_types(\"Types\n\n      \u2022 allocator\n      \u2022 status\n      \u2022 etc.\")\n    end\n\n    subgraph hal[HAL]\n      hal_types(\"Types\n\n      \u2022 buffer\n      \u2022 device\n      \u2022 etc.\")\n\n      hal_drivers(\"Drivers\n\n      \u2022 local-*\n      \u2022 vulkan\n      \u2022 etc.\")\n    end\n\n    subgraph vm[VM]\n      vm_types(\"Types\n\n      \u2022 context\n      \u2022 invocation\n      \u2022 etc.\")\n    end\n\n    runtime_api(\"Runtime API\n\n    \u2022 instance\n    \u2022 session\n    \u2022 call\")\n\n    base_types &amp; hal_types &amp; hal_drivers &amp; vm_types --&gt; runtime_api\n  end\n\n  application(Your application)\n\n  runtime_api --&gt; application</code></pre> <p>Each runtime component has its own low level API. The low level APIs are typically verbose as they expose the full flexibility of each underlying system.</p> <pre><code>graph TD\n  accTitle: IREE runtime low level API diagram\n  accDescr {\n    The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n    their own types and API methods.\n    Applications can interface directly with the IREE runtime via the low\n    level component APIs.\n  }\n\n  subgraph iree_runtime[IREE Runtime]\n    subgraph base\n      base_types(\"Types\n\n      \u2022 allocator\n      \u2022 status\n      \u2022 etc.\")\n    end\n    subgraph hal[HAL]\n      hal_types(\"Types\n\n      \u2022 buffer\n      \u2022 device\n      \u2022 etc.\")\n\n      hal_drivers(\"Drivers\n\n      \u2022 local-*\n      \u2022 vulkan\n      \u2022 etc.\")\n    end\n    subgraph vm[VM]\n      vm_types(\"Types\n\n      \u2022 context\n      \u2022 invocation\n      \u2022 etc.\")\n    end\n  end\n\n  application(Your application)\n\n  base_types &amp; hal_types &amp; hal_drivers &amp; vm_types --&gt; application</code></pre> <p>Runtime API header files are organized by component:</p> Component header file Overview <code>iree/runtime/api.h</code> High level runtime API <code>iree/base/api.h</code> Core API, type definitions, ownership policies, utilities <code>iree/vm/api.h</code> VM APIs: loading modules, I/O, calling functions <code>iree/hal/api.h</code> HAL APIs: device management, synchronization, accessing hardware features"},{"location":"reference/bindings/c-api/#high-level-concepts","title":"High level concepts","text":"<p>The high level API uses instances, sessions, and calls to run programs with a small API surface.</p> <pre><code>stateDiagram-v2\n  accTitle: IREE runtime high level API state diagram\n  accDescr {\n    Instances track sessions and state: options, drivers, devices.\n    Sessions track calls and state: a device and bytecode/VM modules.\n    Calls track input and output lists.\n  }\n\n  state iree_runtime_instance_t {\n    instance_state: state&lt;br&gt;- options&lt;br&gt;- drivers&lt;br&gt;- devices\n\n    state iree_runtime_session_t {\n      session_state: state&lt;br&gt;- device&lt;br&gt;- VM / bytecode modules\n      state iree_runtime_call_t  {\n        inputs\n        outputs\n      }\n    }\n  }</code></pre>"},{"location":"reference/bindings/c-api/#instance","title":"Instance","text":"<p>An instance (<code>iree_runtime_instance_t</code>) isolates runtime usage and manages device resources.</p> <ul> <li>Instances may service multiple sessions to avoid extra device interaction   and reuse caches/pools.</li> <li>Separate instances are isolated/sandboxed from one another.</li> </ul>"},{"location":"reference/bindings/c-api/#session","title":"Session","text":"<p>A session (<code>iree_runtime_session_t</code>) contains a set of loaded modules and their state.</p> <ul> <li>Sessions that share an instance may share resources directly.</li> <li>Sessions that do not share an instance can transfer resources using   import and export APIs.</li> </ul>"},{"location":"reference/bindings/c-api/#call","title":"Call","text":"<p>A call (<code>iree_runtime_call_t</code>) is a stateful VM function call builder.</p> <ul> <li>Calls can be reused to avoid having to construct input lists for each   invocation.</li> </ul>"},{"location":"reference/bindings/c-api/#low-level-concepts","title":"Low level concepts","text":""},{"location":"reference/bindings/c-api/#base","title":"Base","text":"<p>Under construction, more coming soon</p>"},{"location":"reference/bindings/c-api/#vm","title":"VM","text":"<p>IREE uses its own Virtual Machine (VM) at runtime to interpret program instructions on the host system.</p> Tip - EmitC alternate lowering path <p>VM instructions may be further lowered to C source code for static or resource constrained deployment.</p> <p>See the <code>--output-format=vm-c</code> compiler option and the samples in <code>samples/emitc_modules/</code> for more information.</p> <p>The VM supports generic operations like loads, stores, arithmetic, function calls, and control flow. The VM builds streams of more complex program logic and dense math into HAL command buffers that are dispatched to hardware backends.</p> <ul> <li>VM instances can serve multiple isolated execution contexts.</li> <li>VM contexts are effectively sandboxes for loading modules and running   programs.</li> <li> <p>VM modules provide all functionality to execution contexts, including   access to hardware accelerators through the HAL. Compiled user programs are   also modules.</p> <pre><code>stateDiagram-v2\n  accTitle: Sample VM Modules\n  accDescr {\n    Bytecode modules contain program state, program functions, and debug\n    information.\n    HAL modules contain devices, executables, HAL functions, and HAL types.\n    Custom modules may contain external functions and custom types.\n  }\n\n  state \"Bytecode module\" as bytecode {\n    bytecode_contents: Module state&lt;br&gt;Program functions&lt;br&gt;Debug information\n  }\n\n  state \"HAL module\" as HAL {\n    hal_contents: Devices&lt;br&gt;Executables&lt;br&gt;HAL functions&lt;br&gt;HAL types\n  }\n\n  state \"Custom module\" as custom {\n    custom_contents: External functions&lt;br&gt;Custom types\n  }</code></pre> </li> </ul>"},{"location":"reference/bindings/c-api/#hal","title":"HAL","text":"<p>IREE uses a Hardware Abstraction Layer (HAL) to model and interact with hardware devices like CPUs, GPUs and other accelerators.</p> <ul> <li>HAL drivers are used to enumerate and create HAL devices.</li> <li>HAL devices interface with hardware, such as by allocating device memory,   preparing executables, recording and dispatching command buffers, and   synchronizing with the host.</li> <li>HAL buffers represent data storage and buffer views represent views into   that storage with associated shapes and types (similar to \"tensors\").</li> </ul>"},{"location":"reference/bindings/c-api/#usage_1","title":"Usage","text":"<p>For other examples, see the samples below.</p> hello_world_terse.chello_world_explained.c <p>Source file: <code>runtime/src/iree/runtime/demo/hello_world_terse.c</code></p> runtime/src/iree/runtime/demo/hello_world_terse.c<pre><code>#include &lt;stdio.h&gt;\n\n#include \"iree/runtime/api.h\"\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance);\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session);\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n\nint main(int argc, char** argv) {\n  // Create and configure the instance shared across all sessions.\n  iree_runtime_instance_options_t instance_options;\n  iree_runtime_instance_options_initialize(&amp;instance_options);\n  iree_runtime_instance_options_use_all_available_drivers(&amp;instance_options);\n  iree_runtime_instance_t* instance = NULL;\n  IREE_CHECK_OK(iree_runtime_instance_create(\n      &amp;instance_options, iree_allocator_system(), &amp;instance));\n\n  // All sessions should share the same instance.\n  iree_runtime_demo_run_session(instance);\n\n  iree_runtime_instance_release(instance);\n  return 0;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance) {\n  // TODO(#5724): move device selection into the compiled modules.\n  iree_hal_device_t* device = NULL;\n  IREE_CHECK_OK(iree_runtime_instance_try_create_default_device(\n      instance, iree_make_cstring_view(\"local-task\"), &amp;device));\n\n  // Create one session per loaded module to hold the module state.\n  iree_runtime_session_options_t session_options;\n  iree_runtime_session_options_initialize(&amp;session_options);\n  iree_runtime_session_t* session = NULL;\n  IREE_CHECK_OK(iree_runtime_session_create_with_device(\n      instance, &amp;session_options, device,\n      iree_runtime_instance_host_allocator(instance), &amp;session));\n  iree_hal_device_release(device);\n\n  // Load your user module into the session (from memory, from file, etc).\n  const iree_file_toc_t* module_file =\n      iree_runtime_testdata_simple_mul_module_create();\n  IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_memory(\n      session, iree_make_const_byte_span(module_file-&gt;data, module_file-&gt;size),\n      iree_allocator_null()));\n\n  // Run your functions; you should reuse the session to make multiple calls.\n  iree_runtime_demo_perform_mul(session);\n\n  iree_runtime_session_release(session);\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n\n// func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt;\n// tensor&lt;4xf32&gt;\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session) {\n  iree_runtime_call_t call;\n  IREE_CHECK_OK(iree_runtime_call_initialize_by_name(\n      session, iree_make_cstring_view(\"module.simple_mul\"), &amp;call));\n\n  // %arg0: tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* arg0 = NULL;\n  static const iree_hal_dim_t arg0_shape[1] = {4};\n  static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\n  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\n      iree_runtime_session_device(session),\n      iree_runtime_session_device_allocator(session),\n      IREE_ARRAYSIZE(arg0_shape), arg0_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n      (iree_hal_buffer_params_t){\n          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n          .access = IREE_HAL_MEMORY_ACCESS_ALL,\n          .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n      },\n      iree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &amp;arg0));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, arg0, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg0));\n  iree_hal_buffer_view_release(arg0);\n\n  fprintf(stdout, \"\\n * \\n\");\n\n  // %arg1: tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* arg1 = NULL;\n  static const iree_hal_dim_t arg1_shape[1] = {4};\n  static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\n  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\n      iree_runtime_session_device(session),\n      iree_runtime_session_device_allocator(session),\n      IREE_ARRAYSIZE(arg1_shape), arg1_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n      (iree_hal_buffer_params_t){\n          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n          .access = IREE_HAL_MEMORY_ACCESS_ALL,\n          .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n      },\n      iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &amp;arg1));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, arg1, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg1));\n  iree_hal_buffer_view_release(arg1);\n\n  IREE_CHECK_OK(iree_runtime_call_invoke(&amp;call, /*flags=*/0));\n\n  fprintf(stdout, \"\\n = \\n\");\n\n  // -&gt; tensor&lt;4xf32&gt;\n  iree_hal_buffer_view_t* ret0 = NULL;\n  IREE_CHECK_OK(iree_runtime_call_outputs_pop_front_buffer_view(&amp;call, &amp;ret0));\n  IREE_CHECK_OK(iree_hal_buffer_view_fprint(\n      stdout, ret0, /*max_element_count=*/4096,\n      iree_runtime_session_host_allocator(session)));\n  iree_hal_buffer_view_release(ret0);\n\n  iree_runtime_call_deinitialize(&amp;call);\n}\n</code></pre> <p>Source file: <code>runtime/src/iree/runtime/demo/hello_world_explained.c</code></p> runtime/src/iree/runtime/demo/hello_world_explained.c<pre><code>#include &lt;stdio.h&gt;\n\n#include \"iree/runtime/api.h\"\n\nstatic int iree_runtime_demo_main(void);\nstatic iree_status_t iree_runtime_demo_run_session(\n    iree_runtime_instance_t* instance);\nstatic iree_status_t iree_runtime_demo_perform_mul(\n    iree_runtime_session_t* session);\n\n#if defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG)\n\nstatic const char* demo_file_path = NULL;\n\n// Takes the first argument on the command line as a file path and loads it.\nint main(int argc, char** argv) {\n  if (argc &lt; 2) {\n    fprintf(stderr, \"usage: session_demo module_file.vmfb\\n\");\n    return 1;\n  }\n  demo_file_path = argv[1];\n  return iree_runtime_demo_main();\n}\n\n// Loads a compiled IREE module from the file system.\nstatic iree_status_t iree_runtime_demo_load_module(\n    iree_runtime_session_t* session) {\n  return iree_runtime_session_append_bytecode_module_from_file(session,\n                                                               demo_file_path);\n}\n\n#elif defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA)\n\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nint main(int argc, char** argv) { return iree_runtime_demo_main(); }\n\n// Loads the bytecode module directly from memory.\n//\n// Embedding the compiled output into your binary is not always possible (or\n// recommended) but is a fairly painless way to get things working on a variety\n// of targets without worrying about how to deploy files or pass flags.\n//\n// In cases like this the module file is in .rodata and does not need to be\n// freed; if the memory needs to be released when the module is unloaded then a\n// custom allocator can be provided to get a callback instead.\nstatic iree_status_t iree_runtime_demo_load_module(\n    iree_runtime_session_t* session) {\n  const iree_file_toc_t* module_file =\n      iree_runtime_testdata_simple_mul_module_create();\n  return iree_runtime_session_append_bytecode_module_from_memory(\n      session, iree_make_const_byte_span(module_file-&gt;data, module_file-&gt;size),\n      iree_allocator_null());\n}\n\n#else\n#error \"must specify a way to load the module data\"\n#endif  // IREE_RUNTIME_DEMO_LOAD_FILE_FROM_*\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n// Applications should create and share a single instance across all sessions.\n\n// This would live in your application startup/shutdown code or scoped to the\n// usage of IREE. Creating and destroying instances is expensive and should be\n// avoided.\nstatic int iree_runtime_demo_main(void) {\n  // Set up the shared runtime instance.\n  // An application should usually only have one of these and share it across\n  // all of the sessions it has. The instance is thread-safe, while the\n  // sessions are only thread-compatible (you need to lock if its required).\n  iree_runtime_instance_options_t instance_options;\n  iree_runtime_instance_options_initialize(&amp;instance_options);\n  iree_runtime_instance_options_use_all_available_drivers(&amp;instance_options);\n  iree_runtime_instance_t* instance = NULL;\n  iree_status_t status = iree_runtime_instance_create(\n      &amp;instance_options, iree_allocator_system(), &amp;instance);\n\n  // Run the demo.\n  // A real application would load its models (at startup, on-demand, etc) and\n  // retain them somewhere to be reused. Startup time and likelihood of failure\n  // varies across different HAL backends; the synchronous CPU backend is nearly\n  // instantaneous and will never fail (unless out of memory) while the Vulkan\n  // backend may take significantly longer and fail if there are not supported\n  // devices.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_run_session(instance);\n  }\n\n  // Release the shared instance - it will be deallocated when all sessions\n  // using it have been released (here it is deallocated immediately).\n  iree_runtime_instance_release(instance);\n\n  int ret = (int)iree_status_code(status);\n  if (!iree_status_is_ok(status)) {\n    // Dump nice status messages to stderr on failure.\n    // An application can route these through its own logging infrastructure as\n    // needed. Note that the status is a handle and must be freed!\n    iree_status_fprint(stderr, status);\n    iree_status_ignore(status);\n  }\n  return ret;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n// Each instantiation of a module will live in its own session. Module state\n// like variables will be retained across calls within the same session.\n\n// Loads the demo module and uses it to perform some math.\n// In a real application you'd want to hang on to the iree_runtime_session_t\n// and reuse it for future calls - especially if it holds state internally.\nstatic iree_status_t iree_runtime_demo_run_session(\n    iree_runtime_instance_t* instance) {\n  // TODO(#5724): move device selection into the compiled modules.\n  iree_hal_device_t* device = NULL;\n  IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(\n      instance, iree_make_cstring_view(\"local-task\"), &amp;device));\n\n  // Set up the session to run the demo module.\n  // Sessions are like OS processes and are used to isolate modules from each\n  // other and hold runtime state such as the variables used within the module.\n  // The same module loaded into two sessions will see their own private state.\n  iree_runtime_session_options_t session_options;\n  iree_runtime_session_options_initialize(&amp;session_options);\n  iree_runtime_session_t* session = NULL;\n  iree_status_t status = iree_runtime_session_create_with_device(\n      instance, &amp;session_options, device,\n      iree_runtime_instance_host_allocator(instance), &amp;session);\n  iree_hal_device_release(device);\n\n  // Load the compiled user module in a demo-specific way.\n  // Applications could specify files, embed the outputs directly in their\n  // binaries, fetch them over the network, etc.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_load_module(session);\n  }\n\n  // Build and issue the call.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_demo_perform_mul(session);\n  }\n\n  // Release the session and free all resources.\n  iree_runtime_session_release(session);\n  return status;\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n// The inputs and outputs of a call are reusable across calls (and possibly\n// across sessions depending on device compatibility) and can be setup by the\n// application as needed. For example, an application could perform\n// multi-threaded buffer view creation and then issue the call from a single\n// thread when all inputs are ready. This simple demo just allocates them\n// per-call and throws them away.\n\n// Sets up and calls the simple_mul function and dumps the results:\n// func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt;\n// tensor&lt;4xf32&gt;\n//\n// NOTE: this is a demo and as such this performs no memoization; a real\n// application could reuse a lot of these structures and cache lookups of\n// iree_vm_function_t to reduce the amount of per-call overhead.\nstatic iree_status_t iree_runtime_demo_perform_mul(\n    iree_runtime_session_t* session) {\n  // Initialize the call to the function.\n  iree_runtime_call_t call;\n  IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(\n      session, iree_make_cstring_view(\"module.simple_mul\"), &amp;call));\n\n  // Append the function inputs with the HAL device allocator in use by the\n  // session. The buffers will be usable within the session and _may_ be usable\n  // in other sessions depending on whether they share a compatible device.\n  iree_hal_device_t* device = iree_runtime_session_device(session);\n  iree_hal_allocator_t* device_allocator =\n      iree_runtime_session_device_allocator(session);\n  iree_allocator_t host_allocator =\n      iree_runtime_session_host_allocator(session);\n  iree_status_t status = iree_ok_status();\n  {\n    // %arg0: tensor&lt;4xf32&gt;\n    iree_hal_buffer_view_t* arg0 = NULL;\n    if (iree_status_is_ok(status)) {\n      static const iree_hal_dim_t arg0_shape[1] = {4};\n      static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\n      status = iree_hal_buffer_view_allocate_buffer_copy(\n          device, device_allocator,\n          // Shape rank and dimensions:\n          IREE_ARRAYSIZE(arg0_shape), arg0_shape,\n          // Element type:\n          IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n          // Encoding type:\n          IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n          (iree_hal_buffer_params_t){\n              // Where to allocate (host or device):\n              .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n              // Access to allow to this memory:\n              .access = IREE_HAL_MEMORY_ACCESS_ALL,\n              // Intended usage of the buffer (transfers, dispatches, etc):\n              .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n          },\n          // The actual heap buffer to wrap or clone and its allocator:\n          iree_make_const_byte_span(arg0_data, sizeof(arg0_data)),\n          // Buffer view + storage are returned and owned by the caller:\n          &amp;arg0);\n    }\n    if (iree_status_is_ok(status)) {\n      IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\n          stdout, arg0, /*max_element_count=*/4096, host_allocator));\n      // Add to the call inputs list (which retains the buffer view).\n      status = iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg0);\n    }\n    // Since the call retains the buffer view we can release it here.\n    iree_hal_buffer_view_release(arg0);\n\n    fprintf(stdout, \"\\n * \\n\");\n\n    // %arg1: tensor&lt;4xf32&gt;\n    iree_hal_buffer_view_t* arg1 = NULL;\n    if (iree_status_is_ok(status)) {\n      static const iree_hal_dim_t arg1_shape[1] = {4};\n      static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\n      status = iree_hal_buffer_view_allocate_buffer_copy(\n          device, device_allocator, IREE_ARRAYSIZE(arg1_shape), arg1_shape,\n          IREE_HAL_ELEMENT_TYPE_FLOAT_32,\n          IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n          (iree_hal_buffer_params_t){\n              .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n              .access = IREE_HAL_MEMORY_ACCESS_ALL,\n              .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n          },\n          iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &amp;arg1);\n    }\n    if (iree_status_is_ok(status)) {\n      IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\n          stdout, arg1, /*max_element_count=*/4096, host_allocator));\n      status = iree_runtime_call_inputs_push_back_buffer_view(&amp;call, arg1);\n    }\n    iree_hal_buffer_view_release(arg1);\n  }\n\n  // Synchronously perform the call.\n  if (iree_status_is_ok(status)) {\n    status = iree_runtime_call_invoke(&amp;call, /*flags=*/0);\n  }\n\n  fprintf(stdout, \"\\n = \\n\");\n\n  // Dump the function outputs.\n  iree_hal_buffer_view_t* ret0 = NULL;\n  if (iree_status_is_ok(status)) {\n    // Try to get the first call result as a buffer view.\n    status = iree_runtime_call_outputs_pop_front_buffer_view(&amp;call, &amp;ret0);\n  }\n  if (iree_status_is_ok(status)) {\n    // This prints the buffer view out but an application could read its\n    // contents, pass it to another call, etc.\n    status = iree_hal_buffer_view_fprint(\n        stdout, ret0, /*max_element_count=*/4096, host_allocator);\n  }\n  iree_hal_buffer_view_release(ret0);\n\n  iree_runtime_call_deinitialize(&amp;call);\n  return status;\n}\n</code></pre>"},{"location":"reference/bindings/c-api/#samples_1","title":"Samples","text":"Project Source Description iree-org/iree-template-runtime-cmake <code>hello_world.c</code> Runtime application template iree-org/iree <code>runtime/demo/</code> In-tree demos of the high level runtime API iree-org/iree <code>samples/</code> In-tree sample applications iree-org/iree-experimental <code>runtime-library/</code> Shared runtime library builderBuilds <code>libireert.so</code> to aid development iml130/iree-template-cpp <code>simple_embedding.c</code> Demo integration into a project"},{"location":"reference/bindings/c-api/#compiler-runtime-jit","title":"Compiler + Runtime = JIT","text":"<p>The compiler and runtime APIs may be used together to build a \"just in time\" (JIT) execution engine. JIT compilation allows for last-minute specialization with no prior knowledge of target devices and avoids issues with version drift, but it can also constrain deployment options and usage scenarios.</p>"},{"location":"reference/bindings/python/","title":"Python bindings","text":"","tags":["Python"]},{"location":"reference/bindings/python/#overview","title":"Overview","text":"<p>IREE offers Python bindings split into several packages, covering different components:</p> PIP package name Description <code>iree-compiler</code> IREE's generic compiler tools and helpers <code>iree-runtime</code> IREE's runtime, including CPU and GPU backends <code>iree-tools-tf</code> Tools for importing from TensorFlow <code>iree-tools-tflite</code> Tools for importing from TensorFlow Lite <code>iree-jax</code> Tools for importing from JAX <p>Collectively, these packages allow for importing from frontends, compiling towards various targets, and executing compiled code on IREE's backends.</p>","tags":["Python"]},{"location":"reference/bindings/python/#prerequisites","title":"Prerequisites","text":"<p>To use IREE's Python bindings, you will first need to install Python 3 and pip, as needed.</p> Tip - Virtual environments <p>We recommend using virtual environments to manage python packages, such as through <code>venv</code> (about, tutorial):</p>  Linux macOS Windows <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\nsource .venv/bin/activate\n</code></pre> <pre><code>python -m venv .venv\n.venv\\Scripts\\activate.bat\n</code></pre> <p>When done, run <code>deactivate</code>.</p>","tags":["Python"]},{"location":"reference/bindings/python/#installing-iree-packages","title":"Installing IREE packages","text":"","tags":["Python"]},{"location":"reference/bindings/python/#prebuilt-packages","title":"Prebuilt packages","text":"Stable releases Nightly releases <p>Stable release packages are published to PyPI.</p> <pre><code>python -m pip install \\\n  iree-compiler \\\n  iree-runtime\n</code></pre> <p>Nightly releases are published on GitHub releases.</p> <pre><code>python -m pip install \\\n  --find-links https://iree.dev/pip-release-links.html \\\n  --upgrade \\\n  iree-compiler \\\n  iree-runtime\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#building-from-source","title":"Building from source","text":"<p>See Building Python bindings page for instructions for building from source.</p>","tags":["Python"]},{"location":"reference/bindings/python/#usage","title":"Usage","text":"<p>Info - API reference pages</p> <p>API reference pages for IREE's runtime and compiler Python APIs are hosted on readthedocs.</p> <p>Documentation for the MLIR compiler Python APIs can be found at https://mlir.llvm.org/docs/Bindings/Python/.</p>","tags":["Python"]},{"location":"reference/bindings/python/#compile-a-program","title":"Compile a program","text":"<pre><code>from iree import compiler as ireec\n\n# Compile a module.\nINPUT_MLIR = \"\"\"\nmodule @arithmetic {\n  func.func @simple_mul(%arg0: tensor&lt;4xf32&gt;, %arg1: tensor&lt;4xf32&gt;) -&gt; tensor&lt;4xf32&gt; {\n    %0 = arith.mulf %arg0, %arg1 : tensor&lt;4xf32&gt;\n    return %0 : tensor&lt;4xf32&gt;\n  }\n}\n\"\"\"\n\n# Compile using the vmvx (reference) target:\ncompiled_flatbuffer = ireec.tools.compile_str(\n    INPUT_MLIR,\n    target_backends=[\"vmvx\"])\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#run-a-compiled-program","title":"Run a compiled program","text":"<pre><code>from iree import runtime as ireert\nimport numpy as np\n\n# Register the module with a runtime context.\n# Use the \"local-task\" CPU driver, which can load the vmvx executable:\nconfig = ireert.Config(\"local-task\")\nctx = ireert.SystemContext(config=config)\nvm_module = ireert.VmModule.copy_buffer(ctx.instance, compiled_flatbuffer)\nctx.add_vm_module(vm_module)\n\n# Invoke the function and print the result.\nprint(\"INVOKE simple_mul\")\narg0 = np.array([1., 2., 3., 4.], dtype=np.float32)\narg1 = np.array([4., 5., 6., 7.], dtype=np.float32)\nf = ctx.modules.arithmetic[\"simple_mul\"]\nresults = f(arg0, arg1).to_host()\nprint(\"Results:\", results)\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#samples","title":"Samples","text":"<p>Check out the samples in IREE's samples/colab/ directory and the iree-experimental repository for examples using the Python APIs.</p>","tags":["Python"]},{"location":"reference/bindings/python/#console-scripts","title":"Console scripts","text":"<p>The Python packages include console scripts for most of IREE's native tools like <code>iree-compile</code> and <code>iree-run-module</code>.  After installing a package from pip, these should be added to your path automatically:</p> <pre><code>$ python -m pip install iree-runtime\n$ which iree-run-module\n\n/projects/.venv/Scripts/iree-run-module\n</code></pre>","tags":["Python"]},{"location":"reference/bindings/python/#profiling","title":"Profiling","text":"<p>The tools in the <code>iree-runtime</code> package support variants:</p> Variant name Description default Standard runtime tools tracy Runtime tools instrumented using the Tracy profiler <p>Switch between variants of the installed tools using the <code>IREE_PY_RUNTIME</code> environment variable:</p> <pre><code>IREE_PY_RUNTIME=tracy iree-run-module ...\n</code></pre> <p>See the developer documentation page on Profiling with Tracy for information on using Tracy.</p> <p>Tip - flushing profile data</p> <p>When writing a Python-based program that you want to profile you may need to insert IREE runtime calls to periodically flush the profile data:</p> <pre><code>device = ... # HalDevice\ndevice.flush_profiling()\n</code></pre>","tags":["Python"]},{"location":"reference/mlir-dialects/","title":"MLIR dialects","text":"<p>These pages contain automatically generated documentation for the MLIR dialects defined in the IREE repository. IREE also makes extensive use of dialects from the upstream MLIR repository, which are documented at https://mlir.llvm.org/docs/Dialects/.</p>"},{"location":"reference/mlir-dialects/#iree-internal-dialects","title":"IREE internal dialects","text":"<p>These dialects are an implementation detail of the IREE compiler, though they can be used by plugins and other advanced integrations. The sources for most of these dialects can be found in the <code>iree/compiler/Dialect/</code> directory.</p> Dialect Description Check Defines assertions for IREE tests Flow Models execution data flow and partitioning HAL Represents operations against the IREE HAL<sup>1</sup> HAL/Inline Inline HAL interop runtime module dialect HAL/Loader HAL inline executable loader runtime module dialect IO/Parameters External parameter resource management APIs LinalgExt Extensions to the Linalg dialect for specific operations Stream Model execution partitioning and scheduling Util Types and ops common across IREE subdialects VM Represents operations against an abstract virtual machine VMVX Virtual Machine Vector Extensions"},{"location":"reference/mlir-dialects/#iree-public-dialects","title":"IREE public dialects","text":"<p>The ops in these dialects are legal to include in compiler inputs. The sources for these dialects can be found in the <code>llvm-external-projects/iree-dialects/</code> directory that is designed to be used from other projects via LLVM's external projects mechanism.</p> Dialect Description IREEInput Structural ops legal as input to IREE's compiler IREEVectorExt Extensions to the Vector dialect for specific operations <ol> <li> <p>Hardware Abstraction Layer\u00a0\u21a9</p> </li> </ol>"},{"location":"reference/mlir-dialects/Check/","title":"Check","text":""},{"location":"reference/mlir-dialects/Check/#check-dialect","title":"'check' Dialect","text":"<p>A dialect implementing test assertions for IREE modules.</p> <ul> <li>'check' Dialect<ul> <li>Operations<ul> <li>check.expect_all_true (Check::ExpectAllTrueOp)</li> <li>check.expect_almost_eq (Check::ExpectAlmostEqOp)</li> <li>check.expect_almost_eq_const (Check::ExpectAlmostEqConstOp)</li> <li>check.expect_eq (Check::ExpectEqOp)</li> <li>check.expect_eq_const (Check::ExpectEqConstOp)</li> <li>check.expect_false (Check::ExpectFalseOp)</li> <li>check.expect_true (Check::ExpectTrueOp)</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Check/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Check/#checkexpect_all_true-checkexpectalltrueop","title":"<code>check.expect_all_true</code> (Check::ExpectAllTrueOp)","text":"<p>Checks that the operand contains only values that are true</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_all_true` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains true values, which are represented by any non-zero integer.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_all_true&lt;%device&gt;(%arg0) : !hal.buffer_view\ncheck.expect_all_true(%arg1) : tensor&lt;2x2xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands","title":"Operands:","text":"Operand Description <code>device</code> device <code>operand</code> buffer_view or tensor of signless integer values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq-checkexpectalmosteqop","title":"<code>check.expect_almost_eq</code> (Check::ExpectAlmostEqOp)","text":"<p>Checks that the operands are almost equal</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_almost_eq` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the buffer view or tensor operands with float elements are almost equal to within an implementation-defined \"reasonable\" tolerance.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_almost_eq(%arg0, %arg1) : tensor&lt;5xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_1","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> buffer_view or tensor of floating-point values <code>rhs</code> buffer_view or tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq_const-checkexpectalmosteqconstop","title":"<code>check.expect_almost_eq_const</code> (Check::ExpectAlmostEqConstOp)","text":"<p>Checks that the tensor operand is almost equal to some constant</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_almost_eq_const` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the tensor operand with float elements is almost equal to the constant attribute within an implementation-defined \"reasonable\" tolerance.</p> <p>Issues a non-fatal failure if the verification fails.</p> <p>This op is just a convenience wrapper around the expect_almost_eq op.</p> <pre><code>check.expect_almost_eq_const(%const0, dense&lt;[0.999999, 2.0]&gt; : tensor&lt;5xf32&gt;) : tensor&lt;5xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_2","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq-checkexpecteqop","title":"<code>check.expect_eq</code> (Check::ExpectEqOp)","text":"<p>Checks that the tensor or buffer view operands are equal</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_eq` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the operands are exactly equal.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_eq(%arg0, %arg1) : tensor&lt;5xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_3","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> buffer_view or tensor of any type values <code>rhs</code> buffer_view or tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq_const-checkexpecteqconstop","title":"<code>check.expect_eq_const</code> (Check::ExpectEqConstOp)","text":"<p>Checks that the tensor operand is equal to some constant</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_eq_const` (`` `&lt;` $device^ `&gt;`)?\n              `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n</code></pre> <p>Verifies that the tensor operand is exactly equal to a constant attribute.</p> <p>Issues a non-fatal failure if the verification fails.</p> <p>This op is just a convenience wrapper around the expect_eq op.</p> <pre><code>check.expect_eq_const(%arg0, dense&lt;[1, 2]&gt; : tensor&lt;2xi32&gt;) : tensor&lt;2xi32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_4","title":"Operands:","text":"Operand Description <code>device</code> device <code>lhs</code> tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_false-checkexpectfalseop","title":"<code>check.expect_false</code> (Check::ExpectFalseOp)","text":"<p>Checks that the operand is false</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_false` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains a false value, which is represented by zero.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_false(%arg0) : i32\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_5","title":"Operands:","text":"Operand Description <code>operand</code> signless integer"},{"location":"reference/mlir-dialects/Check/#checkexpect_true-checkexpecttrueop","title":"<code>check.expect_true</code> (Check::ExpectTrueOp)","text":"<p>Checks that the operand is true</p> <p>Syntax:</p> <pre><code>operation ::= `check.expect_true` `(` $operand `)` attr-dict `:` type($operand)\n</code></pre> <p>Verifies that the operand contains a true value, which is represented by any non-zero integer.</p> <p>Issues a non-fatal failure if the verification fails.</p> <pre><code>check.expect_true(%arg0) : i32\n</code></pre>"},{"location":"reference/mlir-dialects/Check/#operands_6","title":"Operands:","text":"Operand Description <code>operand</code> signless integer"},{"location":"reference/mlir-dialects/Flow/","title":"Flow","text":""},{"location":"reference/mlir-dialects/Flow/#flow-dialect","title":"'flow' Dialect","text":"<p>A dialect designed to model execution data flow and partitioning.</p> <p>The flow dialect is used to model regions of dense computation and the data flow between them. MLIR value-semantic tensors are used as the primary data type to allow SSA use-def to provide a bulk of the infrastructure required to perform the computation partitioning and outlining.</p> <p>The dialect is designed to ingest relatively high-level linear algebra via XLA HLO ops (that also operate on the value-semantic tensor types) and optionally MLIR standard ops for control flow and other actions. After conversion of any higher-level ops that have special semantics in the flow dialect, such as global variables, the rest are partitioned into regions containing simple and compatible computations. Finally, outlining moves the computations into executables and leaves only the execution flow encoded via dispatch operations.</p> <p>The primary unit of interest is a \"dispatch region\" containing compatible computations that can be scheduled together efficiently (and safely). \"Compatible\" here is specified as similarly shaped workloads that indicate how many invocations a computation can be parallelized across when running in a SPMD execution model. Though it depends on the particular runtime backends this more concretely means things like the untiled workload (or tiled workgroups) used in GPU dispatches or similar thread pool executors.</p> <p>After identification of the dispatchable regions a set of transformations performs folding and simplification to reduce the total number of dispatches. Heuristics are used in certain cases to more efficiently schedule special ops (such as GEMM) and the design is amenable to profile- guided analysis that can be added in the future.</p> <p>The resulting outlined executable modules containing the dispatchable code can be translated to one or more backends (such as SPIR-V for Vulkan, or LLVM IR for running on the CPU, etc). The IR that is outlined is untouched and in the input format (such as XLA HLO ops) allowing conversion using any MLIR target that supports ingesting such input. A few special ops are used to communicate statically available information such as the expected workload size, shapes of inputs and outputs, etc.</p> <ul> <li>'flow' Dialect<ul> <li>Operations<ul> <li>Collective communication ops<ul> <li>flow.channel.count (Flow::ChannelCountOp)</li> <li>flow.channel.default (Flow::ChannelDefaultOp)</li> <li>flow.channel.rank (Flow::ChannelRankOp)</li> <li>flow.channel.split (Flow::ChannelSplitOp)</li> <li>flow.collective.all_gather (Flow::CollectiveAllGatherOp)</li> <li>flow.collective.all_reduce (Flow::CollectiveAllReduceOp)</li> <li>flow.collective.all_to_all (Flow::CollectiveAllToAllOp)</li> <li>flow.collective.reduce_scatter (Flow::CollectiveReduceScatterOp)</li> <li>flow.collective.send_recv (Flow::CollectiveSendRecvOp)</li> </ul> </li> <li>Dispatch ops<ul> <li>flow.dispatch (Flow::DispatchOp)</li> </ul> </li> <li>Executable ops<ul> <li>flow.executable_end (Flow::ExecutableEndOp)</li> <li>flow.executable.export (Flow::ExecutableExportOp)</li> <li>flow.executable (Flow::ExecutableOp)</li> </ul> </li> <li>Partitioned region ops<ul> <li>flow.dispatch.region (Flow::DispatchRegionOp)</li> <li>flow.dispatch.tensor.load (Flow::DispatchTensorLoadOp)</li> <li>flow.dispatch.tensor.store (Flow::DispatchTensorStoreOp)</li> <li>flow.dispatch.tie_shape (Flow::DispatchTieShapeOp)</li> <li>flow.dispatch.workgroup.count (Flow::DispatchWorkgroupCountOp)</li> <li>flow.dispatch.workgroup.id (Flow::DispatchWorkgroupIDOp)</li> <li>flow.dispatch.workgroup.size (Flow::DispatchWorkgroupSizeOp)</li> <li>flow.dispatch.workgroups (Flow::DispatchWorkgroupsOp)</li> <li>flow.return (Flow::ReturnOp)</li> </ul> </li> <li>Streamable call ops<ul> <li>flow.call (Flow::CallOp)</li> <li>flow.func (Flow::FuncOp)</li> </ul> </li> <li>Tensor ops<ul> <li>flow.dispatch.workgroup_count_from_dag_root (Flow::DispatchWorkgroupCountFromDagRootOp)</li> <li>flow.dispatch.workgroup_count_from_slice (Flow::DispatchWorkgroupCountFromSliceOp)</li> <li>flow.dispatch.workload.ordinal (Flow::DispatchWorkloadOrdinalOp)</li> <li>flow.tensor.alloca (Flow::TensorAllocaOp)</li> <li>flow.tensor.bitcast (Flow::TensorBitCastOp)</li> <li>flow.tensor.clone (Flow::TensorCloneOp)</li> <li>flow.tensor.constant (Flow::TensorConstantOp)</li> <li>flow.tensor.empty (Flow::TensorEmptyOp)</li> <li>flow.tensor.load (Flow::TensorLoadOp)</li> <li>flow.tensor.reshape (Flow::TensorReshapeOp)</li> <li>flow.tensor.slice (Flow::TensorSliceOp)</li> <li>flow.tensor.splat (Flow::TensorSplatOp)</li> <li>flow.tensor.store (Flow::TensorStoreOp)</li> <li>flow.tensor.tie_shape (Flow::TensorTieShapeOp)</li> <li>flow.tensor.trace (Flow::TensorTraceOp)</li> <li>flow.tensor.update (Flow::TensorUpdateOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>DummyAttr</li> </ul> </li> <li>Type constraints<ul> <li>dispatch.tensor</li> <li>dispatch.tensor</li> <li>dispatch.tensor</li> </ul> </li> <li>Types<ul> <li>ChannelType</li> <li>DummyType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Flow/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Flow/#collective-communication-ops","title":"Collective communication ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowchannelcount-flowchannelcountop","title":"<code>flow.channel.count</code> (Flow::ChannelCountOp)","text":"<p>Returns the total number of participants in the group</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.count` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total participant count in the collective communicator group.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowchanneldefault-flowchanneldefaultop","title":"<code>flow.channel.default</code> (Flow::ChannelDefaultOp)","text":"<p>Returns a default collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.default` ($group^)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a channel initialized using the runtime environment.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>group</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#results_1","title":"Results:","text":"Result Description <code>result</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowchannelrank-flowchannelrankop","title":"<code>flow.channel.rank</code> (Flow::ChannelRankOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.rank` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_1","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowchannelsplit-flowchannelsplitop","title":"<code>flow.channel.split</code> (Flow::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `flow.channel.split` $channel `,` $color `,` $key\n              `:` type($channel) `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_2","title":"Operands:","text":"Operand Description <code>channel</code> a collecive communication channel <code>color</code> index <code>key</code> index"},{"location":"reference/mlir-dialects/Flow/#results_3","title":"Results:","text":"Result Description <code>result</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_gather-flowcollectiveallgatherop","title":"<code>flow.collective.all_gather</code> (Flow::CollectiveAllGatherOp)","text":"<p>Performs all-gather operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_gather` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>It gathers data from all ranks and concatenates them on the 0-th dimension. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_3","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_4","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_reduce-flowcollectiveallreduceop","title":"<code>flow.collective.all_reduce</code> (Flow::CollectiveAllReduceOp)","text":"<p>Performs all-reduce operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_reduce` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation reduces data across all the ranks in the channel. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>reduction_op</code>mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_4","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_5","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_to_all-flowcollectivealltoallop","title":"<code>flow.collective.all_to_all</code> (Flow::CollectiveAllToAllOp)","text":"<p>Performs all-to-all operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.all_to_all` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>This operation mutually exchanges data acrosss all of the ranks in the channel. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_5","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_6","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivereduce_scatter-flowcollectivereducescatterop","title":"<code>flow.collective.reduce_scatter</code> (Flow::CollectiveReduceScatterOp)","text":"<p>Performs reduce and scatter operations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.reduce_scatter` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n              `(` type($target) `,` type($source) `,` type($channel) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation reduces data across all the ranks in the channel and     scatters the result to each rank. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>reduction_op</code>mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_6","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_7","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivesend_recv-flowcollectivesendrecvop","title":"<code>flow.collective.send_recv</code> (Flow::CollectiveSendRecvOp)","text":"<p>Performs a grouped send and receive operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.collective.send_recv` $element_type `,` $target `,` $source `,` $channel `,` $send `,` $recv `:`\n              `(` type($target) `,` type($source) `,` type($channel) `,` type($send) `,` type($recv) `)` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims, $tied_operands)\n              attr-dict-with-keyword\n</code></pre> <p>The operation sends data to the rank specificied by send     and receives data from the rank specified by recv. If send is -1, this rank     will not send any data. If recv is -1, this rank will not receive any data     and the output will be all zeros. Interfaces: <code>InferTypeOpInterface</code>, <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_7","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>source</code> ranked tensor of any type values <code>channel</code> a collecive communication channel <code>send</code> index <code>recv</code> index"},{"location":"reference/mlir-dialects/Flow/#results_8","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatch-flowdispatchop","title":"<code>flow.dispatch</code> (Flow::DispatchOp)","text":"<p>A dispatch of workgroups across a grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch` custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Dispatches workgroups across an grid defined by the captured workload parameters carrying the information required to compute the workgroup count at runtime. The function for converting the workload into a 3D workgroup count is attached to the dispatch entry point and may contain arbitrary host logic.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_8","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_9","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#executable-ops","title":"Executable ops","text":"<p>Executables for outlined regions.</p>"},{"location":"reference/mlir-dialects/Flow/#flowexecutable_end-flowexecutableendop","title":"<code>flow.executable_end</code> (Flow::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Flow::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/Flow/#flowexecutableexport-flowexecutableexportop","title":"<code>flow.executable.export</code> (Flow::ExecutableExportOp)","text":"<p>Defines an executable entry point for dispatch operations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              custom&lt;SymbolAlias&gt;($sym_name, $function_ref)\n              custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              attr-dict-with-keyword\n</code></pre> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.</p> <p>Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.</p> <p>Traits: <code>HasParent&lt;IREE::Flow::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Flow/#flowexecutable-flowexecutableop","title":"<code>flow.executable</code> (Flow::ExecutableOp)","text":"<p>Generic executable module</p> <p>Syntax:</p> <pre><code>operation ::= `flow.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Flow::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#partitioned-region-ops","title":"Partitioned region ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchregion-flowdispatchregionop","title":"<code>flow.dispatch.region</code> (Flow::DispatchRegionOp)","text":"<p>A group of ops</p> <p>This op is a container/grouping of ops. It represents a fusion group before being lowered to a dispatch region. Ops are collected inside of the region body of the op. Values from parent regions can be captured. Results are yielded with a <code>return</code> terminator and returned from this op.</p> <p><code>dispatch.region</code> ops are lowered to <code>dispatch.workgroups</code> ops. Workgroups isolated from above. <code>dispatch.region</code> ops are a more lightweight abstraction for implementing fusion heuristics, i.e., the process of deciding which ops should form a dispatch region.</p> <p>This op also has a second region: <code>workload_count</code>. The arguments to the region represent the workload for the dispatch, and returns the number of workgroups for the dispatch. The region is lowered directly to <code>workload_count</code> region of <code>dispatch.workgroups</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_9","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index <code>workload</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_10","title":"Results:","text":"Result Description <code>result</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorload-flowdispatchtensorloadop","title":"<code>flow.dispatch.tensor.load</code> (Flow::DispatchTensorLoadOp)","text":"<p>Loads a tensor from a dispatch input placeholder</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tensor.load` $source\n              `,` `offsets` `=` custom&lt;DynamicIndexList&gt;(\n              $offsets, $static_offsets)\n              `,` `sizes` `=` custom&lt;DynamicIndexList&gt;(\n              $sizes, $static_sizes)\n              `,` `strides` `=` custom&lt;DynamicIndexList&gt;(\n              $strides, $static_strides)\n              attr-dict `:` type($source) (`{` $source_dims^ `}`)?  `-&gt;` type($result)\n</code></pre> <p>Loads an input tensor or subtensor from an input placeholder. As each workgroup executes concurrently all workgroups will receive identical loaded results of regions that may overlap.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OffsetSizeAndStrideOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>static_offsets</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_sizes</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_strides</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_10","title":"Operands:","text":"Operand Description <code>source</code> dispatch.tensor <code>source_dims</code> variadic of index <code>offsets</code> variadic of index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_11","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorstore-flowdispatchtensorstoreop","title":"<code>flow.dispatch.tensor.store</code> (Flow::DispatchTensorStoreOp)","text":"<p>Stores a tensor into a dispatch output placeholder</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tensor.store` $value `,` $target\n              `,` `offsets` `=` custom&lt;DynamicIndexList&gt;(\n              $offsets, $static_offsets)\n              `,` `sizes` `=` custom&lt;DynamicIndexList&gt;(\n              $sizes, $static_sizes)\n              `,` `strides` `=` custom&lt;DynamicIndexList&gt;(\n              $strides, $static_strides)\n              attr-dict `:` type($value) `-&gt;` type($target) (`{` $target_dims^ `}`)?\n</code></pre> <p>Stores a tensor or subtensor into an output tensor placeholder. As each workgroup executes concurrently behavior is undefined if more than one workgroup stores into overlapping regions of the full output tensor.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>OffsetSizeAndStrideOpInterface</code>, <code>Util_ShapeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>static_offsets</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_sizes</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_strides</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_11","title":"Operands:","text":"Operand Description <code>value</code> ranked tensor of any type values <code>target</code> dispatch.tensor <code>target_dims</code> variadic of index <code>offsets</code> variadic of index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtie_shape-flowdispatchtieshapeop","title":"<code>flow.dispatch.tie_shape</code> (Flow::DispatchTieShapeOp)","text":"<p>Ties a runtime shape to a dispatch I/O argument</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.tie_shape` $operand attr-dict\n              `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Metadata op used to tie a runtime-computed shape with dynamic dimensions to a dispatch input/output argument. All uses of the argument should use the pass-through result of this op to allow for SSA-based shape resolution.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_12","title":"Operands:","text":"Operand Description <code>operand</code> dispatch.tensor <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_12","title":"Results:","text":"Result Description <code>result</code> dispatch.tensor"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupcount-flowdispatchworkgroupcountop","title":"<code>flow.dispatch.workgroup.count</code> (Flow::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.count[0] : index\n%y = flow.dispatch.workgroup.count[1] : index\n%z = flow.dispatch.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_13","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupid-flowdispatchworkgroupidop","title":"<code>flow.dispatch.workgroup.id</code> (Flow::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, flow.dispatch.workgroup.count)</code> along each dimension.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.id[0] : index\n%y = flow.dispatch.workgroup.id[1] : index\n%z = flow.dispatch.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_14","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupsize-flowdispatchworkgroupsizeop","title":"<code>flow.dispatch.workgroup.size</code> (Flow::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the flow dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = flow.dispatch.workgroup.size[0] : index\n%y = flow.dispatch.workgroup.size[1] : index\n%z = flow.dispatch.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_15","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroups-flowdispatchworkgroupsop","title":"<code>flow.dispatch.workgroups</code> (Flow::DispatchWorkgroupsOp)","text":"<p>A dispatch of workgroups across a 3-dimensional grid</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroups` (`[` $workload^ `]`)? ``\n              `(` $arguments `)` `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n              attr-dict-with-keyword\n              `=` `\\n` ` ` ` ` ` `\n              custom&lt;DispatchWorkgroupBody&gt;(ref(type($arguments)),\n              ref(type($results)),\n              $workgroup_body)\n              `` custom&lt;DispatchWorkgroupsCountRegion&gt;($workgroup_count)\n</code></pre> <p>Dispatches some number of workgroups across a 3-dimensional grid. The body region will be invoked for each workgroup with a unique <code>flow.dispatch.workgroup.id</code> in the range of <code>[0, flow.dispatch.workgroup.count)</code> (along each dimension XYZ).</p> <p>From the outside the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors.</p> <p>Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the <code>flow.dispatch.workgroup.*</code> ops to query the workgroup ID/count/size instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc).</p> <p>Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it.</p> <pre><code>%r = flow.dispatch.workgroups[%c5, %c5](%0, %1)\n    : (tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt; =\n          (%arg0: !flow.dispatch.tensor&lt;readonly:tensor&lt;5x5xf32&gt;&gt;,\n           %arg1: !flow.dispatch.tensor&lt;readonly:tensor&lt;5xf32&gt;&gt;,\n           %arg2: !flow.dispatch.tensor&lt;writeonly:tensor&lt;5x5xf32&gt;&gt;) {\n  ...\n}\n</code></pre> <p>The number of results of the operation is equal to the number of results in the type signature (<code>(tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;</code>). Each tensor argument and result in the type signature has a corresponding block argument of type <code>!flow.dispatch.tensor</code>. Furthermore, each argument has a corresponding <code>arguments</code> operand.</p> <p>There are no <code>arguments</code> operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, <code>-&gt; %0</code> would tie the first argument to the result. In that case, there would be no separate block argument for the result.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>ClosureOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_13","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_16","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowreturn-flowreturnop","title":"<code>flow.return</code> (Flow::ReturnOp)","text":"<p>Return from a flow.dispatch_region</p> <p>Syntax:</p> <pre><code>operation ::= `flow.return` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p>Returns the given values from the region and back to the host code.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_14","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#streamable-call-ops","title":"Streamable call ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowcall-flowcallop","title":"<code>flow.call</code> (Flow::CallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `flow.call` $callee\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Calls a function taking/returning tensor values with stream semantics. Tensors have their shapes captured and may be tied to denote in-place operations. Asynchronous calls must have no side-effects.</p> <p>Note that returned tensors must have their shapes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the shape (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If shapes are unknowable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>CallOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_15","title":"Operands:","text":"Operand Description <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_17","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowfunc-flowfuncop","title":"<code>flow.func</code> (Flow::FuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `flow.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              ``\n              custom&lt;ShapedFunctionSignature&gt;($function_type,\n              $tied_operands,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>flow.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Flow/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_dag_root-flowdispatchworkgroupcountfromdagrootop","title":"<code>flow.dispatch.workgroup_count_from_dag_root</code> (Flow::DispatchWorkgroupCountFromDagRootOp)","text":"<p>Workgroup count computed based on iteration range of the root of the DAG     for ops within the dispatch.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup_count_from_dag_root` attr-dict $operands\n</code></pre> <p>When using tile + distribution of the root of the DAG (Directed Acyclic Graph) of ops within the dispatch to split the work amongst workgroups. The workload captured is the size of the iteration space of the root of the DAG. This op represents the computation that given the workload returns the number of workgroups to use. The backends are responsible for lowering this op into actual computation (typically based on the tile sizes used to tile and distribute the root of the DAG).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_16","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_18","title":"Results:","text":"Result Description <code>x</code> index <code>y</code> index <code>z</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_slice-flowdispatchworkgroupcountfromsliceop","title":"<code>flow.dispatch.workgroup_count_from_slice</code> (Flow::DispatchWorkgroupCountFromSliceOp)","text":"<p>Place holder to signify default workgroup count calculation.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workgroup_count_from_slice` attr-dict $operands\n</code></pre> <p>The default computation of the number of workgroups (or workgroup count) assumes that the dispatch + captured values is enough to compute the workgroup count. It does so by using a program slice of the values within the dispatch that represent the number of workgroups when available within the dispatch. Currently the arguments of index types captured by the <code>flow.dispatch.workgroups</code> is treated as the workload for the operation. It is a requirement that the slice of the program that computes the number of workgroups will need to have its leaves be these captured values.</p> <p>TODO: This could be generalized in future to allow the slices to encompass arbitrary computation. The computation of the workgroup count can then be done on the device itself, if this is data dependent. In such cases the workload could be more than just values of index types.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_17","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_19","title":"Results:","text":"Result Description <code>x</code> index <code>y</code> index <code>z</code> index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkloadordinal-flowdispatchworkloadordinalop","title":"<code>flow.dispatch.workload.ordinal</code> (Flow::DispatchWorkloadOrdinalOp)","text":"<p>Annotates the values captured as workload within the body of     <code>flow.dispatch.workgroups</code> op.</p> <p>Syntax:</p> <pre><code>operation ::= `flow.dispatch.workload.ordinal` attr-dict $operand `,` $ordinal `:` type($operand)\n</code></pre> <p>The arguments that represent the captured/returned values of the `flow.dispatch.workgroups, i.e. the signature of the body of the op is not preserved during IREEs compilation. Since the workloads are derived from the operands captured by the operation, this op denotes the values captured as workloads. This can be used in the backends to map back to the workload values while materializing the workgroup count computation.</p> <p>TODO: Find a better way to represent this information, either by somehow propagating the signature of the created dispatch workgroup op through the compilation stack until the codegen backends, or as a separate list/attribute that can be plumbed through without using explicit ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>ordinal</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#operands_18","title":"Operands:","text":"Operand Description <code>operand</code> index"},{"location":"reference/mlir-dialects/Flow/#results_20","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Flow/#flowtensoralloca-flowtensorallocaop","title":"<code>flow.tensor.alloca</code> (Flow::TensorAllocaOp)","text":"<p>An empty tensor allocation with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.alloca` `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new transient tensor allocation with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read. The resulting tensor may be long-lived and allocated as part of a dedicated allocation. Prefer using <code>flow.tensor.empty</code> whenever possible as this op disables nearly all allocation-related optimizations performed by the compiler. The presence of this op is often an indication of an improper lowering.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_19","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_21","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorbitcast-flowtensorbitcastop","title":"<code>flow.tensor.bitcast</code> (Flow::TensorBitCastOp)","text":"<p>Bitcasts a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.bitcast` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Bitcasts a tensor to a new type without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_20","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_22","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorclone-flowtensorcloneop","title":"<code>flow.tensor.clone</code> (Flow::TensorCloneOp)","text":"<p>Performs a full tensor clone operation</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.clone` $operand `:` type($result) (`{` $argument_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones the input tensor into an identical output tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_21","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>argument_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_23","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorconstant-flowtensorconstantop","title":"<code>flow.tensor.constant</code> (Flow::TensorConstantOp)","text":"<p>Tensor constant that can have dynamic dimensions</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.constant` $value attr-dict `-&gt;` type($result)\n</code></pre> <p>Allows specifying a constant where the return value can erase shape information. This operation is declared as having side effects and has no folder, so will not be optimized away by the compiler. The underlying shape information should be hidden from the compiler and resolved at runtime.</p> <pre><code>%c = flow.tensor.constant tensor&lt;2x2xf32&gt; -&gt; tensor&lt;?x?xf32&gt;\n%res = math.absf %c : tensor&lt;?x?xf32&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/Flow/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Flow/#results_24","title":"Results:","text":"Result Description <code>result</code> tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorempty-flowtensoremptyop","title":"<code>flow.tensor.empty</code> (Flow::TensorEmptyOp)","text":"<p>An empty tensor carrying metadata but no contents</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.empty` `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_22","title":"Operands:","text":"Operand Description <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_25","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorload-flowtensorloadop","title":"<code>flow.tensor.load</code> (Flow::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.load` $source (`[` $indices^ `]`)? `:`\n              type($source) (`{` $source_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_23","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_26","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorreshape-flowtensorreshapeop","title":"<code>flow.tensor.reshape</code> (Flow::TensorReshapeOp)","text":"<p>Reshapes a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.reshape` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Reshapes a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_24","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_27","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorslice-flowtensorsliceop","title":"<code>flow.tensor.slice</code> (Flow::TensorSliceOp)","text":"<p>Slices out a subregion of a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones a subregion of a tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_25","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_28","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorsplat-flowtensorsplatop","title":"<code>flow.tensor.splat</code> (Flow::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_26","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_29","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorstore-flowtensorstoreop","title":"<code>flow.tensor.store</code> (Flow::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n              type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_27","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_30","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortie_shape-flowtensortieshapeop","title":"<code>flow.tensor.tie_shape</code> (Flow::TensorTieShapeOp)","text":"<p>Ties a runtime shape to a tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.tie_shape` $operand attr-dict\n              `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Metadata op used to tie tensors with their runtime-computed dynamic dimensions. This only exists transiently in the IR as a witness to shape calculations and is removed during lowering.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_28","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_31","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortrace-flowtensortraceop","title":"<code>flow.tensor.trace</code> (Flow::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.trace` $key `=` `[`\n              custom&lt;ShapedOperandList&gt;($values, type($values), $value_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ShapeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/Flow/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#operands_29","title":"Operands:","text":"Operand Description <code>values</code> variadic of ranked tensor of any type values <code>value_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowtensorupdate-flowtensorupdateop","title":"<code>flow.tensor.update</code> (Flow::TensorUpdateOp)","text":"<p>Updates a tensor with the contents of another tensor</p> <p>Syntax:</p> <pre><code>operation ::= `flow.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n              type($update) (`{` $update_dims^ `}`)? `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims)\n              attr-dict-with-keyword\n</code></pre> <p>Updates the target tensor with the contents of the update tensor at the given offset indices.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>HoistableOpInterface</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Flow/#operands_30","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>start_indices</code> variadic of index <code>update</code> ranked tensor of any type values <code>update_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_32","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#attributes_20","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Flow/#dummyattr","title":"DummyAttr","text":"<p>Syntax: <code>#flow.dummy</code></p>"},{"location":"reference/mlir-dialects/Flow/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/Flow/#dispatchtensor","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region input/output operand. This can be used to query the metadata about the tensor (such as its shape) as well as both load and store from the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_1","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region input operand. This can be used to query the metadata about the tensor (such as its shape) as well as load from the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_2","title":"dispatch.tensor","text":"<p>A placeholder for a dispatch region output operand. This can be used to query the metadata about the tensor (such as its shape) as well as store to the backing tensor representation.</p>"},{"location":"reference/mlir-dialects/Flow/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Flow/#channeltype","title":"ChannelType","text":"<p>a collecive communication channel</p> <p>Syntax: <code>!flow.channel</code></p> <p>Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.</p> <p>In programs that have already been partitioned prior to being compiled there will often exist only one channel and <code>flow.channel.default</code> can be used to reference it. In programs that model SPMD behavior internally channels can be created or provided by hosting applications.</p>"},{"location":"reference/mlir-dialects/Flow/#dummytype","title":"DummyType","text":"<p>Syntax: <code>!flow.dummy</code></p>"},{"location":"reference/mlir-dialects/HAL/","title":"HAL","text":""},{"location":"reference/mlir-dialects/HAL/#hal-dialect","title":"'hal' Dialect","text":"<p>A dialect representing operations against the IREE HAL.</p> <p>This can be thought of as a Vulkan-like model with all of the graphics bits chopped out.</p> <p>The type set is limited to those that can be represented in the IREE HAL design: buffers and views, synchronization primitives like semaphores, and and command buffers. The intent is that if a device could implement the HAL interface the sequencer ops could run on that device, such as being able to run on a GPU via indirect command buffers.</p> <p>Though this is mostly a 1:1 mapping to the iree::hal API there are some methods omitted as they are not likely to be needed in IR. It's assumed that either sequencer interfaces will encapsulate the logic (such as device resolution) or that certain features are unsafe to expose to user-defined input.</p> <ul> <li>'hal' Dialect<ul> <li>Operations<ul> <li>Allocator ops<ul> <li>hal.allocator.allocate (HAL::AllocatorAllocateOp)</li> <li>hal.allocator.import (HAL::AllocatorImportOp)</li> </ul> </li> <li>Buffer ops<ul> <li>hal.buffer.assert (HAL::BufferAssertOp)</li> <li>hal.buffer.length (HAL::BufferLengthOp)</li> <li>hal.buffer.load (HAL::BufferLoadOp)</li> <li>hal.buffer.store (HAL::BufferStoreOp)</li> <li>hal.buffer.subspan (HAL::BufferSubspanOp)</li> </ul> </li> <li>Buffer view ops<ul> <li>hal.buffer_view.assert (HAL::BufferViewAssertOp)</li> <li>hal.buffer_view.buffer (HAL::BufferViewBufferOp)</li> <li>hal.buffer_view.create (HAL::BufferViewCreateOp)</li> <li>hal.buffer_view.dim (HAL::BufferViewDimOp)</li> <li>hal.buffer_view.element_type (HAL::BufferViewElementTypeOp)</li> <li>hal.buffer_view.encoding_type (HAL::BufferViewEncodingTypeOp)</li> <li>hal.buffer_view.rank (HAL::BufferViewRankOp)</li> <li>hal.buffer_view.trace (HAL::BufferViewTraceOp)</li> <li>hal.element_type (HAL::ElementTypeOp)</li> <li>hal.encoding_type (HAL::EncodingTypeOp)</li> </ul> </li> <li>Channel ops<ul> <li>hal.channel.create (HAL::ChannelCreateOp)</li> <li>hal.channel.rank_and_count (HAL::ChannelRankAndCountOp)</li> <li>hal.channel.split (HAL::ChannelSplitOp)</li> </ul> </li> <li>Command buffer ops<ul> <li>hal.command_buffer.begin_debug_group (HAL::CommandBufferBeginDebugGroupOp)</li> <li>hal.command_buffer.collective (HAL::CommandBufferCollectiveOp)</li> <li>hal.command_buffer.copy_buffer (HAL::CommandBufferCopyBufferOp)</li> <li>hal.command_buffer.create (HAL::CommandBufferCreateOp)</li> <li>hal.command_buffer.device (HAL::CommandBufferDeviceOp)</li> <li>hal.command_buffer.dispatch.indirect (HAL::CommandBufferDispatchIndirectOp)</li> <li>hal.command_buffer.dispatch.indirect.symbol (HAL::CommandBufferDispatchIndirectSymbolOp)</li> <li>hal.command_buffer.dispatch (HAL::CommandBufferDispatchOp)</li> <li>hal.command_buffer.dispatch.symbol (HAL::CommandBufferDispatchSymbolOp)</li> <li>hal.command_buffer.end_debug_group (HAL::CommandBufferEndDebugGroupOp)</li> <li>hal.command_buffer.execution_barrier (HAL::CommandBufferExecutionBarrierOp)</li> <li>hal.command_buffer.fill_buffer (HAL::CommandBufferFillBufferOp)</li> <li>hal.command_buffer.finalize (HAL::CommandBufferFinalizeOp)</li> <li>hal.command_buffer.push_constants (HAL::CommandBufferPushConstantsOp)</li> <li>hal.command_buffer.push_descriptor_set (HAL::CommandBufferPushDescriptorSetOp)</li> </ul> </li> <li>Descriptor set layout ops<ul> <li>hal.descriptor_set_layout.create (HAL::DescriptorSetLayoutCreateOp)</li> </ul> </li> <li>Device management ops<ul> <li>hal.devices.count (HAL::DevicesCountOp)</li> <li>hal.devices.get (HAL::DevicesGetOp)</li> </ul> </li> <li>Device ops<ul> <li>hal.device.allocator (HAL::DeviceAllocatorOp)</li> <li>hal.device.query (HAL::DeviceQueryOp)</li> <li>hal.device.queue.alloca (HAL::DeviceQueueAllocaOp)</li> <li>hal.device.queue.dealloca (HAL::DeviceQueueDeallocaOp)</li> <li>hal.device.queue.execute (HAL::DeviceQueueExecuteOp)</li> <li>hal.device.queue.flush (HAL::DeviceQueueFlushOp)</li> <li>hal.device.queue.read (HAL::DeviceQueueReadOp)</li> <li>hal.device.queue.write (HAL::DeviceQueueWriteOp)</li> <li>hal.return (HAL::ReturnOp)</li> </ul> </li> <li>Executable ops<ul> <li>hal.executable.binary (HAL::ExecutableBinaryOp)</li> <li>hal.executable.calculate_workgroups (HAL::ExecutableCalculateWorkgroupsOp)</li> <li>hal.executable.condition (HAL::ExecutableConditionOp)</li> <li>hal.executable.constant.block (HAL::ExecutableConstantBlockOp)</li> <li>hal.executable.constant.load (HAL::ExecutableConstantLoadOp)</li> <li>hal.executable.create (HAL::ExecutableCreateOp)</li> <li>hal.executable_end (HAL::ExecutableEndOp)</li> <li>hal.executable.export (HAL::ExecutableExportOp)</li> <li>hal.executable.lookup (HAL::ExecutableLookupOp)</li> <li>hal.executable (HAL::ExecutableOp)</li> <li>hal.executable.source_end (HAL::ExecutableSourceEndOp)</li> <li>hal.executable.source (HAL::ExecutableSourceOp)</li> <li>hal.executable.variant_end (HAL::ExecutableVariantEndOp)</li> <li>hal.executable.variant (HAL::ExecutableVariantOp)</li> </ul> </li> <li>Experimental ops<ul> <li>hal.ex.file.from_memory (HAL::ExFileFromMemoryOp)</li> </ul> </li> <li>Fence ops<ul> <li>hal.fence.await (HAL::FenceAwaitOp)</li> <li>hal.fence.create (HAL::FenceCreateOp)</li> <li>hal.fence.fail (HAL::FenceFailOp)</li> <li>hal.fence.join (HAL::FenceJoinOp)</li> <li>hal.fence.query (HAL::FenceQueryOp)</li> <li>hal.fence.signal (HAL::FenceSignalOp)</li> </ul> </li> <li>Instrument ops<ul> <li>hal.instrument.memory.load (HAL::InstrumentMemoryLoadOp)</li> <li>hal.instrument.memory.store (HAL::InstrumentMemoryStoreOp)</li> <li>hal.instrument.print (HAL::InstrumentPrintOp)</li> <li>hal.instrument.value (HAL::InstrumentValueOp)</li> <li>hal.instrument.workgroup (HAL::InstrumentWorkgroupOp)</li> </ul> </li> <li>Interface ops<ul> <li>hal.interface.binding.subspan (HAL::InterfaceBindingSubspanOp)</li> <li>hal.interface.constant.load (HAL::InterfaceConstantLoadOp)</li> <li>hal.interface.workgroup.count (HAL::InterfaceWorkgroupCountOp)</li> <li>hal.interface.workgroup.id (HAL::InterfaceWorkgroupIDOp)</li> <li>hal.interface.workgroup.size (HAL::InterfaceWorkgroupSizeOp)</li> </ul> </li> <li>Pipeline layout ops<ul> <li>hal.pipeline_layout.create (HAL::PipelineLayoutCreateOp)</li> <li>hal.pipeline_layout.lookup (HAL::PipelineLayoutLookupOp)</li> </ul> </li> <li>Pseudo Ops<ul> <li>hal.dispatch.extern (HAL::DispatchExternOp)</li> <li>hal.tensor.barrier (HAL::TensorBarrierOp)</li> <li>hal.tensor.export (HAL::TensorExportOp)</li> <li>hal.tensor.import (HAL::TensorImportOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>AffinityQueueAttr</li> <li>CollectiveAttr</li> <li>DescriptorSetBindingAttr</li> <li>DescriptorSetLayoutAttr</li> <li>DescriptorTypeAttr</li> <li>DeviceTargetAttr</li> <li>ExecutableObjectAttr</li> <li>ExecutableObjectsAttr</li> <li>ExecutableTargetAttr</li> <li>InterfaceBindingAttr</li> <li>PipelineLayoutAttr</li> </ul> </li> <li>Type constraints<ul> <li>allocator</li> <li>buffer</li> <li>buffer_view</li> <li>collective.channel</li> <li>command_buffer</li> <li>descriptor_set_layout</li> <li>device</li> <li>event</li> <li>executable</li> <li>fence</li> <li>buffer</li> <li>pipeline_layout</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HAL/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HAL/#allocator-ops","title":"Allocator ops","text":"<p>Ops for <code>!hal.allocator</code> / <code>iree_hal_allocator_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halallocatorallocate-halallocatorallocateop","title":"<code>hal.allocator.allocate</code> (HAL::AllocatorAllocateOp)","text":"<p>Empty buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.allocator.allocate` `&lt;` $allocator `:` type($allocator) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $result_size)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer of the given size from the allocator. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands","title":"Operands:","text":"Operand Description <code>allocator</code> allocator <code>queue_affinity</code> 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/HAL/#results","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#halallocatorimport-halallocatorimportop","title":"<code>hal.allocator.import</code> (HAL::AllocatorImportOp)","text":"<p>Allocator-supported host buffer import operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.allocator.import` `&lt;` $allocator `:` type($allocator) `&gt;`\n              `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `affinity` `(` $queue_affinity `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` type($did_import) `,` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Tries importing host memory backed by the given byte buffer into a device accessible <code>!hal.buffer</code>. The returned buffer may be host-only and not directly usable on devices. If the mapping cannot be completed (such as trying to map the host memory as device-local on devices with discrete memory) then <code>did_import</code> will indicate that the returned buffer is null.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_1","title":"Operands:","text":"Operand Description <code>allocator</code> allocator <code>queue_affinity</code> 64-bit signless integer <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#results_1","title":"Results:","text":"Result Description <code>did_import</code> 1-bit signless integer <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-ops","title":"Buffer ops","text":"<p>Ops for <code>!hal.buffer</code> / <code>iree_hal_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halbufferassert-halbufferassertop","title":"<code>hal.buffer.assert</code> (HAL::BufferAssertOp)","text":"<p>Buffer compatibility assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.assert` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `message` `(` $message `)`\n              `allocator` `(` $allocator `:` type($allocator) `)`\n              `minimum_length` `(` $minimum_length `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer is compatible with the given allocator and usage. Program execution will abort as if <code>std.assert</code> had been used.</p> <p>This only checks that the buffer can be used and not that it matches the given parameters exactly. Buffers may be from other allocators so long as the allocators are compatible (devices can address each other's memory), the type and usage contain all the requested bits (having more bits is ok), and the length is at least the requested minimum (as padding may be ignored).</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_2","title":"Operands:","text":"Operand Description <code>buffer</code> buffer <code>allocator</code> allocator <code>minimum_length</code> index"},{"location":"reference/mlir-dialects/HAL/#halbufferlength-halbufferlengthop","title":"<code>hal.buffer.length</code> (HAL::BufferLengthOp)","text":"<p>Buffer byte length accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.length` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_3","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HAL/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbufferload-halbufferloadop","title":"<code>hal.buffer.load</code> (HAL::BufferLoadOp)","text":"<p>Buffer element load operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.load` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Loads a value from a buffer by mapping it.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_4","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#results_3","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/HAL/#halbufferstore-halbufferstoreop","title":"<code>hal.buffer.store</code> (HAL::BufferStoreOp)","text":"<p>Buffer element store operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.store` `&lt;` $target_buffer `:` type($target_buffer) `&gt;`\n              `` `[` $target_offset `]`\n              `value` `(` $value `:` type($value) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Stores a value into a buffer by mapping it.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_5","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target_buffer</code> buffer <code>target_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffersubspan-halbuffersubspanop","title":"<code>hal.buffer.subspan</code> (HAL::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_6","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#results_4","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-view-ops","title":"Buffer view ops","text":"<p>Ops for <code>!hal.buffer_view</code> / <code>iree_hal_buffer_view_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewassert-halbufferviewassertop","title":"<code>hal.buffer_view.assert</code> (HAL::BufferViewAssertOp)","text":"<p>Buffer view contents assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.assert` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `message` `(` $message `)`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if <code>std.assert</code> had been used.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_7","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewbuffer-halbufferviewbufferop","title":"<code>hal.buffer_view.buffer</code> (HAL::BufferViewBufferOp)","text":"<p>Buffer view buffer accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.buffer` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer backing this view's contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_8","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_5","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewcreate-halbufferviewcreateop","title":"<code>hal.buffer_view.create</code> (HAL::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_9","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_6","title":"Results:","text":"Result Description <code>result</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewdim-halbufferviewdimop","title":"<code>hal.buffer_view.dim</code> (HAL::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.dim` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `` `[` $index `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_10","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_7","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewelement_type-halbufferviewelementtypeop","title":"<code>hal.buffer_view.element_type</code> (HAL::BufferViewElementTypeOp)","text":"<p>Buffer view element type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.element_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_11","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_8","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewencoding_type-halbufferviewencodingtypeop","title":"<code>hal.buffer_view.encoding_type</code> (HAL::BufferViewEncodingTypeOp)","text":"<p>Buffer view encoding type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.encoding_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the encoding type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_12","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_9","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewrank-halbufferviewrankop","title":"<code>hal.buffer_view.rank</code> (HAL::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.rank` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_13","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_10","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewtrace-halbufferviewtraceop","title":"<code>hal.buffer_view.trace</code> (HAL::BufferViewTraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.buffer_view.trace` $key `=`\n              $operands `:` type($operands)\n              attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_14","title":"Operands:","text":"Operand Description <code>operands</code> variadic of buffer_view"},{"location":"reference/mlir-dialects/HAL/#halelement_type-halelementtypeop","title":"<code>hal.element_type</code> (HAL::ElementTypeOp)","text":"<p>An iree_hal_element_type_t for the given MLIR type</p> <p>Syntax:</p> <pre><code>operation ::= `hal.element_type` `&lt;` $type `&gt;`\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Maps an MLIR type to a runtime <code>iree_hal_element_type_t</code> value for all types that are convertable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>type</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/HAL/#results_11","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halencoding_type-halencodingtypeop","title":"<code>hal.encoding_type</code> (HAL::EncodingTypeOp)","text":"<p>An iree_hal_encoding_type_t for the given MLIR encoding</p> <p>Syntax:</p> <pre><code>operation ::= `hal.encoding_type` `&lt;` ($encoding^):( `` `dense_row_major`)? `&gt;`\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Maps an MLIR encoding to a runtime <code>iree_hal_encoding_type_t</code> value for all encodings that are convertable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>encoding</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HAL/#results_12","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#channel-ops","title":"Channel ops","text":"<p>Ops for <code>!hal.channel</code> / <code>iree_hal_channel_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halchannelcreate-halchannelcreateop","title":"<code>hal.channel.create</code> (HAL::ChannelCreateOp)","text":"<p>Creates a new channel for collective communication</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.create` `device` `(` $device `:` type($device) `)`\n              `affinity` `(` $queue_affinity `)`\n              `flags` `(` $flags `)`\n              `id` `(` $id `)`\n              `group` `(` $group `)`\n              `rank` `(` $rank `)`\n              `count` `(` $count `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new channel with the given rank associated with the given device queue. Collective operations using this channel must only be submitted on compatible queues.</p> <p>The group and ID are optional and may be null. A rank or count of -1 can be used to indicate a default inherited from the environment or device configuration.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_15","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>id</code> a reference counted byte buffer <code>group</code> a reference counted byte buffer <code>rank</code> 32-bit signless integer <code>count</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_13","title":"Results:","text":"Result Description <code>result</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#halchannelrank_and_count-halchannelrankandcountop","title":"<code>hal.channel.rank_and_count</code> (HAL::ChannelRankAndCountOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.rank_and_count` `&lt;` $channel `:` type($channel) `&gt;`\n              `:` type($rank) `,` type($count)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code> and the total participant count.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_16","title":"Operands:","text":"Operand Description <code>channel</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#results_14","title":"Results:","text":"Result Description <code>rank</code> 32-bit signless integer <code>count</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halchannelsplit-halchannelsplitop","title":"<code>hal.channel.split</code> (HAL::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `hal.channel.split` `&lt;` $channel `:` type($channel) `&gt;`\n              `color` `(` $color `)`\n              `key` `(` $key `)`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_17","title":"Operands:","text":"Operand Description <code>channel</code> collective.channel <code>color</code> 32-bit signless integer <code>key</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_15","title":"Results:","text":"Result Description <code>result</code> collective.channel"},{"location":"reference/mlir-dialects/HAL/#command-buffer-ops","title":"Command buffer ops","text":"<p>Ops for <code>!hal.command_buffer</code> / <code>iree_hal_command_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferbegin_debug_group-halcommandbufferbegindebuggroupop","title":"<code>hal.command_buffer.begin_debug_group</code> (HAL::CommandBufferBeginDebugGroupOp)","text":"<p>Pushes a command buffer debug group label</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.begin_debug_group` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `label` `(` $label `)`\n              attr-dict-with-keyword\n</code></pre> <p>Pushes a new debug group with the given label. All commands between this and a mandatory matching call to <code>hal.command_buffer.end_debug_group</code> will be grouped together with the given label.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>label</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_18","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercollective-halcommandbuffercollectiveop","title":"<code>hal.command_buffer.collective</code> (HAL::CommandBufferCollectiveOp)","text":"<p>Command buffer collective dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.collective` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `channel` `(` $channel `:` type($channel) `)`\n              `op` `(` $op `)`\n              (`param` `(` $param^ `:` type($param) `)`)?\n              (`send` `(` $send_buffer^ `:` type($send_buffer) `)`\n              `` `[` $send_offset `,` $send_length `]`)?\n              (`recv` `(` $recv_buffer^ `:` type($recv_buffer) `)`\n              `` `[` $recv_offset `,` $recv_length `]`)?\n              `count` `(` $element_count `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches a collective operation defined by op using the given buffers.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::HAL::CollectiveAttrcollective operation and specification"},{"location":"reference/mlir-dialects/HAL/#operands_19","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>channel</code> collective.channel <code>element_count</code> index <code>param</code> 32-bit signless integer <code>send_buffer</code> buffer <code>send_offset</code> index <code>send_length</code> index <code>recv_buffer</code> buffer <code>recv_offset</code> index <code>recv_length</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercopy_buffer-halcommandbuffercopybufferop","title":"<code>hal.command_buffer.copy_buffer</code> (HAL::CommandBufferCopyBufferOp)","text":"<p>Command buffer buffer copy recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.copy_buffer` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `source` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a range of one buffer to another.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_20","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>source_buffer</code> buffer <code>source_offset</code> index <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercreate-halcommandbuffercreateop","title":"<code>hal.command_buffer.create</code> (HAL::CommandBufferCreateOp)","text":"<p>Command buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.create` `device` `(` $device `:` type($device) `)`\n              `mode` `(` $modes `)`\n              `categories` `(` $command_categories `)`\n              (`bindings` `(` $binding_capacity^ `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a command buffer from the device pool ready to begin recording.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>modes</code>mlir::iree_compiler::IREE::HAL::CommandBufferModeBitfieldAttrvalid CommandBufferMode <code>command_categories</code>mlir::iree_compiler::IREE::HAL::CommandCategoryBitfieldAttrvalid CommandCategory"},{"location":"reference/mlir-dialects/HAL/#operands_21","title":"Operands:","text":"Operand Description <code>device</code> device <code>binding_capacity</code> index"},{"location":"reference/mlir-dialects/HAL/#results_16","title":"Results:","text":"Result Description <code>result</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdevice-halcommandbufferdeviceop","title":"<code>hal.command_buffer.device</code> (HAL::CommandBufferDeviceOp)","text":"<p>Command buffer device query operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.device` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `:` type($device)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to access the device used to create a command buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_22","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#results_17","title":"Results:","text":"Result Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirect-halcommandbufferdispatchindirectop","title":"<code>hal.command_buffer.dispatch.indirect</code> (HAL::CommandBufferDispatchIndirectOp)","text":"<p>Command buffer indirect dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.indirect` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n              `` `[` $workgroups_offset `]`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request with the dispatch parameters loaded from the given buffer.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_23","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>executable</code> executable <code>workgroups_buffer</code> buffer <code>workgroups_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirectsymbol-halcommandbufferdispatchindirectsymbolop","title":"<code>hal.command_buffer.dispatch.indirect.symbol</code> (HAL::CommandBufferDispatchIndirectSymbolOp)","text":"<p>Command buffer indirect dispatch recording operation, using symbolref</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.indirect.symbol` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n              `` `[` $workgroups_offset `]`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request with the dispatch parameters loaded from the given buffer, using using a nested symbol reference to the entry point.</p> <pre><code>hal.command_buffer.dispatch.indirect.symbol %cmd, @executable::@target::@entry,\n                                            workgroups = %buffer[%offset]\n</code></pre>"},{"location":"reference/mlir-dialects/HAL/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_24","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>workgroups_buffer</code> buffer <code>workgroups_offset</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatch-halcommandbufferdispatchop","title":"<code>hal.command_buffer.dispatch</code> (HAL::CommandBufferDispatchOp)","text":"<p>Command buffer dispatch recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_25","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchsymbol-halcommandbufferdispatchsymbolop","title":"<code>hal.command_buffer.dispatch.symbol</code> (HAL::CommandBufferDispatchSymbolOp)","text":"<p>Command buffer dispatch recording operation, using symbolref</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.dispatch.symbol` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches an execution request, using a nested symbol reference to the entry point.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_26","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferend_debug_group-halcommandbufferenddebuggroupop","title":"<code>hal.command_buffer.end_debug_group</code> (HAL::CommandBufferEndDebugGroupOp)","text":"<p>Pops a command buffer debug group label</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.end_debug_group` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Pops a debug group from the stack.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_27","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferexecution_barrier-halcommandbufferexecutionbarrierop","title":"<code>hal.command_buffer.execution_barrier</code> (HAL::CommandBufferExecutionBarrierOp)","text":"<p>Command buffer execution barrier recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.execution_barrier` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `source` `(` $source_stage_mask `)`\n              `target` `(` $target_stage_mask `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines an execution dependency between all commands recorded before the barrier and all commands recorded after the barrier. Only the stages provided will be affected.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_stage_mask</code>mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage <code>target_stage_mask</code>mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage <code>flags</code>mlir::iree_compiler::IREE::HAL::ExecutionBarrierFlagBitfieldAttrvalid ExecutionBarrierFlag"},{"location":"reference/mlir-dialects/HAL/#operands_28","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfill_buffer-halcommandbufferfillbufferop","title":"<code>hal.command_buffer.fill_buffer</code> (HAL::CommandBufferFillBufferOp)","text":"<p>Command buffer buffer fill recording operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.fill_buffer` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `,` $length `]`\n              `pattern` `(` $pattern `:` type($pattern) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Fills the target buffer with the given repeating value.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_29","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index <code>pattern</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfinalize-halcommandbufferfinalizeop","title":"<code>hal.command_buffer.finalize</code> (HAL::CommandBufferFinalizeOp)","text":"<p>Finalizes command buffer recording</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.finalize` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Ends recording into the command buffer and prepares it for submission. No more commands may be recorded into the command buffer.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_30","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_constants-halcommandbufferpushconstantsop","title":"<code>hal.command_buffer.push_constants</code> (HAL::CommandBufferPushConstantsOp)","text":"<p>Command buffer push constants operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.push_constants` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n              `offset` `(` $offset `)`\n              `values` `(` `[` $values `]` `)`\n              `:` type($values)\n              attr-dict-with-keyword\n</code></pre> <p>Pushes an inline set of constants that can be accessed by subsequent dispatches using a compatible pipeline layout.</p> <p>Push constants are always 4-byte values and treated as opaque, meaning that they may be bit-casted floats, bit-packed booleans, etc.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>offset</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_31","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>pipeline_layout</code> pipeline_layout <code>values</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_descriptor_set-halcommandbufferpushdescriptorsetop","title":"<code>hal.command_buffer.push_descriptor_set</code> (HAL::CommandBufferPushDescriptorSetOp)","text":"<p>Command buffer descriptor set push binding operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.command_buffer.push_descriptor_set` `&lt;` $command_buffer `:` type($command_buffer) `&gt;`\n              `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n              `` `[` $set `]`\n              `bindings` `(` `[`\n              custom&lt;DescriptorSetBindings&gt;($binding_ordinals,\n              $binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Pushes an inline-defined descriptor set to the command buffer. The provided buffers may either be HAL buffers or indirect references into the command buffer binding table.</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_32","title":"Operands:","text":"Operand Description <code>command_buffer</code> command_buffer <code>pipeline_layout</code> pipeline_layout <code>set</code> index <code>binding_ordinals</code> variadic of index <code>binding_buffers</code> variadic of index or buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#descriptor-set-layout-ops","title":"Descriptor set layout ops","text":"<p>Ops for <code>!hal.descriptor_set_layout</code> / <code>iree_hal_descriptor_set_layout_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutcreate-haldescriptorsetlayoutcreateop","title":"<code>hal.descriptor_set_layout.create</code> (HAL::DescriptorSetLayoutCreateOp)","text":"<p>Creates a descriptor set layout</p> <p>Syntax:</p> <pre><code>operation ::= `hal.descriptor_set_layout.create` `device` `(` $device `:` type($device) `)`\n              `flags` `(` $flags `)`\n              `bindings` `(` $bindings `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a descriptor set layout that defines the bindings used within a set. The same descriptor set layout may be shared with many different executable layouts and by doing so some runtime binding overhead when switching between executables that use the same set layouts can be reduced.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags <code>bindings</code>::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_33","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_18","title":"Results:","text":"Result Description <code>result</code> descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#device-management-ops","title":"Device management ops","text":"<p>Device availability and selection support.</p>"},{"location":"reference/mlir-dialects/HAL/#haldevicescount-haldevicescountop","title":"<code>hal.devices.count</code> (HAL::DevicesCountOp)","text":"<p>Returns the number of available devices</p> <p>Syntax:</p> <pre><code>operation ::= `hal.devices.count` attr-dict `:` type($result)\n</code></pre> <p>Returns the total number of available devices registered at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#results_19","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#haldevicesget-haldevicesgetop","title":"<code>hal.devices.get</code> (HAL::DevicesGetOp)","text":"<p>Returns the device with the given index</p> <p>Syntax:</p> <pre><code>operation ::= `hal.devices.get` $index attr-dict `:` type($result)\n</code></pre> <p>Returns the device with the given index in the [0, hal.devices.count) range. Devices may be lazily initialized upon first use.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_34","title":"Operands:","text":"Operand Description <code>index</code> index"},{"location":"reference/mlir-dialects/HAL/#results_20","title":"Results:","text":"Result Description <code>result</code> device"},{"location":"reference/mlir-dialects/HAL/#device-ops","title":"Device ops","text":"<p>Ops for <code>!hal.device</code> / <code>iree_hal_device_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#haldeviceallocator-haldeviceallocatorop","title":"<code>hal.device.allocator</code> (HAL::DeviceAllocatorOp)","text":"<p>Device allocator accessor operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.allocator` `&lt;` $device `:` type($device) `&gt;` `:` type($result) attr-dict-with-keyword\n</code></pre> <p>Returns the allocator that can be used to allocate buffers compatible with the device.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_35","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_21","title":"Results:","text":"Result Description <code>result</code> allocator"},{"location":"reference/mlir-dialects/HAL/#haldevicequery-haldevicequeryop","title":"<code>hal.device.query</code> (HAL::DeviceQueryOp)","text":"<p>Returns a runtime configuration parameter from the device</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.query` `&lt;` $device `:` type($device) `&gt;`\n              `key` `(` $category `:` `` `:` $key `)`\n              `:` type($ok) `,` type($value)\n              (`=` $default_value^)?\n              attr-dict-with-keyword\n</code></pre> <p>Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.</p> <p>This is roughly equivalent to the <code>sysconf</code> linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent.</p> <p>Users of the op must check the <code>ok</code> result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with <code>ex.</code> when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.</p> <p>Well-known keys:</p> <ul> <li> <p>hal.device.id :: {some id pattern}   Returns 1 if the device identifier matches the given pattern string.</p> </li> <li> <p>hal.executable.format :: {some format pattern}   Returns 1 if the given format is supported by the device loader.</p> </li> <li> <p>hal.device :: concurrency   The maximum concurrently executable submissions, mapping roughly to the   queue count. The actual concurrency available may be less than this based   on dynamic runtime parameters such as power/thermal modes, quota limits,   or user choice.</p> </li> <li> <p>hal.dispatch :: concurrency   The maximum concurrently executable workgroups for a particular dispatch.   The actual concurrency available may be less depending on device state.</p> </li> </ul> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>category</code>::mlir::StringAttrstring attribute <code>key</code>::mlir::StringAttrstring attribute <code>default_value</code>::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/HAL/#operands_36","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_22","title":"Results:","text":"Result Description <code>ok</code> 1-bit signless integer <code>value</code> any type"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuealloca-haldevicequeueallocaop","title":"<code>hal.device.queue.alloca</code> (HAL::DeviceQueueAllocaOp)","text":"<p>Allocates a queue-ordered transient buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.alloca` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `pool` `(` $pool `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $result_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a queue-ordered transient buffer that will be available for use when the signal fence is reached. The allocation will not be made until the wait fence has been reached.</p> <p>The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>The buffer handle will remain live so long as there are retainers but the contents are undefined before the allocation signal fence has been signaled and after the deallocation wait fence has been reached.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_37","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>pool</code> 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/HAL/#results_23","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuedealloca-haldevicequeuedeallocaop","title":"<code>hal.device.queue.dealloca</code> (HAL::DeviceQueueDeallocaOp)","text":"<p>Deallocates a queue-ordered transient buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.dealloca` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `buffer` `(` $buffer `:` type($buffer) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Deallocates a queue-ordered transient buffer. The deallocation will not be made until the wait fence has been reached and once the storage is available for reuse the signal fence will be signaled.</p> <p>After deallocation the contents of the buffer may still be accessible but will have undefined contents as other operations reuse the memory.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_38","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueexecute-haldevicequeueexecuteop","title":"<code>hal.device.queue.execute</code> (HAL::DeviceQueueExecuteOp)","text":"<p>Enqueues command buffer execution</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.execute` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              (`commands` `(` `[` $command_buffers^ `]` `)`)?\n              attr-dict-with-keyword\n</code></pre> <p>Executes one or more command buffers on a device queue. The command buffers are executed in order as if they were recorded as one. No commands will execute until the wait fence has been reached and the signal fence will be signaled when all commands have completed.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_39","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>command_buffers</code> variadic of command_buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueflush-haldevicequeueflushop","title":"<code>hal.device.queue.flush</code> (HAL::DeviceQueueFlushOp)","text":"<p>Flushes locally-pending submissions to the queue</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.flush` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              attr-dict-with-keyword\n</code></pre> <p>Flushes any locally-pending submissions in the queue. When submitting many queue operations this can be used to eagerly flush earlier submissions while later ones are still being constructed. This may be a no-op.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_40","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueread-haldevicequeuereadop","title":"<code>hal.device.queue.read</code> (HAL::DeviceQueueReadOp)","text":"<p>Reads a segment from a file into a device buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.read` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `source` `(` $source_file `:` type($source_file) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_buffer `:` type($target_buffer) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Enqueues a file read operation that streams a segment of the source file defined by the source offset and length into the target HAL buffer at the specified target offset. The queue affinity should be set to where the target buffer will be consumed. The source file must have read permission and the target buffer must have transfer-target usage. Read failure will result in propagated semaphore failure or device loss.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_41","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_file</code> buffer <code>source_offset</code> 64-bit signless integer <code>target_buffer</code> buffer <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuewrite-haldevicequeuewriteop","title":"<code>hal.device.queue.write</code> (HAL::DeviceQueueWriteOp)","text":"<p>Writes a segment from a device buffer into a file</p> <p>Syntax:</p> <pre><code>operation ::= `hal.device.queue.write` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `source` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `]`\n              `target` `(` $target_file `:` type($target_file) `)`\n              `` `[` $target_offset `]`\n              `length` `(` $length `)`\n              `flags` `(` $flags `)`\n              attr-dict-with-keyword\n</code></pre> <p>Enqueues a file write operation that streams a segment of the source HAL buffer defined by the source offset and length into the target file at the specified target offset. The queue affinity should be set to where the source buffer was produced. The source buffer must have transfer-source usage and the target file must have write permission. Write failure will result in propagated semaphore failure or device loss.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_42","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_buffer</code> buffer <code>source_offset</code> index <code>target_file</code> buffer <code>target_offset</code> 64-bit signless integer <code>length</code> index"},{"location":"reference/mlir-dialects/HAL/#halreturn-halreturnop","title":"<code>hal.return</code> (HAL::ReturnOp)","text":"<p>Return from a hal.* region</p> <p>Syntax:</p> <pre><code>operation ::= `hal.return` ($operands^ `:` type($operands))? attr-dict\n</code></pre> <p>Returns the given values from the region and back to the host code.</p> <p>Traits: <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_43","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#executable-ops","title":"Executable ops","text":"<p>Ops for <code>!hal.executable</code> / <code>iree_hal_executable_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablebinary-halexecutablebinaryop","title":"<code>hal.executable.binary</code> (HAL::ExecutableBinaryOp)","text":"<p>Compiled executable binary data</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.binary` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n</code></pre> <p>A compiled executable binary with an optional nested module containing the IR prior to serialization (for debugging).</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>format</code>::mlir::StringAttrstring attribute <code>data</code>::mlir::DenseIntElementsAttr8-bit signless integer elements attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablecalculate_workgroups-halexecutablecalculateworkgroupsop","title":"<code>hal.executable.calculate_workgroups</code> (HAL::ExecutableCalculateWorkgroupsOp)","text":"<p>Calculates workgroup count from workload for an exported function</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.calculate_workgroups` `device` `(` $device `:` type($device) `)`\n              `target` `(` $entry_point `)`\n              (`workload` `(` `[` $workload^ `]` `)`)?\n              `:` type($workgroup_x) `,` type($workgroup_y) `,` type($workgroup_z)\n              attr-dict-with-keyword\n</code></pre> <p>Calculates the workgroup count (grid XYZ) based on the given workload using the workgroup count calculation region of the target <code>hal.executable.export</code> op.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_44","title":"Operands:","text":"Operand Description <code>device</code> device <code>workload</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_24","title":"Results:","text":"Result Description <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index"},{"location":"reference/mlir-dialects/HAL/#halexecutablecondition-halexecutableconditionop","title":"<code>hal.executable.condition</code> (HAL::ExecutableConditionOp)","text":"<p>Host code to determine if the executable is enabled</p> <p>Variants are selected based on their target and this optional condition op that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantblock-halexecutableconstantblockop","title":"<code>hal.executable.constant.block</code> (HAL::ExecutableConstantBlockOp)","text":"<p>Executable constant block initializer</p> <p>Initializes one or more constants in the executable constant block by returning one value per identified constant. Each constant block is evaluated on the host prior to instantiating the executable for a given device and allows for the executable to be specialized based on device capabilities and limits.</p> <p>The keys specified are unique per variant and will be deduplicated across multiple constant blocks when present. They are only used during lowering and will not survive to runtime so they need only have descriptive enough names to avoid collisions and represent the semantics of the value.</p> <p>Constant values can be loaded in the device code with the <code>hal.executable.constant.load</code> op:</p> <pre><code>hal.executable.variant public @target {\n  hal.executable.constant.block(%device: !hal.device) -&gt; (i32, i32) as (\"foo\", \"bar\") {\n    %0 = hal.device.query&lt;%device&gt; key(\"some.device.prop\")...\n    %1 = hal.device.query&lt;%device&gt; key(\"another.device.prop\")...\n    hal.return %0, %1 : i32, i32\n  }\n  builtin.module {\n    func @dispatch0() {\n      %0 = hal.executable.constant.load \"foo\" : i32\n      %1 = hal.executable.constant.load \"bar\" : i32\n      return\n    }\n  }\n}\n</code></pre> <p>Each target backend will implement the constant initialization and access in a way compatible with its execution model. Examples: - CPU: read-only buffer initialized on load and passed to each dispatch - CUDA: read-only buffer initialized on load and passed to each dispatch - SPIR-V: specialization constants - Metal: function constants - WebGPU: pipeline-overridable constants</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp, IREE::HAL::ExecutableVariantOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>keys</code>::mlir::ArrayAttrarray attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantload-halexecutableconstantloadop","title":"<code>hal.executable.constant.load</code> (HAL::ExecutableConstantLoadOp)","text":"<p>Loads a constant value from the executable constant block</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.constant.load` $key attr-dict `:` type($result)\n</code></pre> <p>Loads a scalar constant value from the static executable constant block. The value provided by a constant block with the given key will be loaded and bitcast (possibly with truncation or zero-extension) to the result type.</p> <p>Note that backends are allowed to implement their own mechanisms for referencing constant block values and this is provided only as a default for those not needing special behavior.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#results_25","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halexecutablecreate-halexecutablecreateop","title":"<code>hal.executable.create</code> (HAL::ExecutableCreateOp)","text":"<p>Creates an executable</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.create` `device` `(` $device `:` type($device) `)`\n              `target` `(` $executable_target `)`\n              `layouts` `(` `[` $layouts `]` `)`\n              (`constants` `(` `[` $constants^ `]` `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a target-dependent executable cached on the provided device. Entry points contained within the executable can be dispatched using the resulting executable handle.</p> <p>Depending on the driver creation may take a non-trivial amount of time (such as when JITing/etc). As the cache is internally synchronized callers can issue preparation requests from multiple threads - even for the same executables - and calls will block until preparation completes.</p> <p>Optional constants provide for specialization of the executable based on runtime-derived parameters.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable_target</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_45","title":"Operands:","text":"Operand Description <code>device</code> device <code>layouts</code> variadic of pipeline_layout <code>constants</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_26","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable_end-halexecutableendop","title":"<code>hal.executable_end</code> (HAL::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutableexport-halexecutableexportop","title":"<code>hal.executable.export</code> (HAL::ExecutableExportOp)","text":"<p>Executable entry point declaration</p> <p>An entry point exported by the executable with statically-available information describing the IO interface it uses and other dispatch metadata.</p> <p>The <code>workgroup_count</code> region represents the computation that returns the number of workgroups to use in the 3D grid dispatch. The arguments to the region represents the workload as captured by each dispatch. It returns the number of workgroups along x, y, and z.</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp, IREE::HAL::ExecutableVariantOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrsize_t <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablelookup-halexecutablelookupop","title":"<code>hal.executable.lookup</code> (HAL::ExecutableLookupOp)","text":"<p>Executable cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.lookup` `device` `(` $device `:` type($device) `)`\n              `executable` `(` $executable `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_46","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_27","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable-halexecutableop","title":"<code>hal.executable</code> (HAL::ExecutableOp)","text":"<p>Target-specific executable module</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module representing a target-specific compiled kernel/shader/etc.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource_end-halexecutablesourceendop","title":"<code>hal.executable.source_end</code> (HAL::ExecutableSourceEndOp)","text":"<p>Terminator pseudo-op for the executable source op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.source_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableSourceOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource-halexecutablesourceop","title":"<code>hal.executable.source</code> (HAL::ExecutableSourceOp)","text":"<p>Generic source contents of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.source` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              ``\n              $body\n</code></pre> <p>This is an unspecialized source representation of an executable module without an assigned target. This is useful for hand-authoring executables prior to device specification.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableSourceEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>objects</code>::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant_end-halexecutablevariantendop","title":"<code>hal.executable.variant_end</code> (HAL::ExecutableVariantEndOp)","text":"<p>Terminator pseudo-op for the executable variant op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.variant_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableVariantOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant-halexecutablevariantop","title":"<code>hal.executable.variant</code> (HAL::ExecutableVariantOp)","text":"<p>Target-specific variant of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.executable.variant` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              `target` `(` $target `)`\n              (`objects` `(` $objects^ `)` )?\n              attr-dict-with-keyword\n              $body\n</code></pre> <p>The target IR for the executable. This can be preserved for debugging but is usually removed during transformation.</p> <p>Variants are selected based on their target and an optional condition op that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>HasParent&lt;IREE::HAL::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::HAL::ExecutableVariantEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>target</code>::mlir::iree_compiler::IREE::HAL::ExecutableTargetAttrgeneric executable target specification <code>objects</code>::mlir::ArrayAttrHAL executable object references"},{"location":"reference/mlir-dialects/HAL/#experimental-ops","title":"Experimental ops","text":"<p>Temporary hack ops expected to be removed in the future.</p>"},{"location":"reference/mlir-dialects/HAL/#halexfilefrom_memory-halexfilefrommemoryop","title":"<code>hal.ex.file.from_memory</code> (HAL::ExFileFromMemoryOp)","text":"<p>Creates a file mapped into a byte range of a host buffer</p> <p>Syntax:</p> <pre><code>operation ::= `hal.ex.file.from_memory` `device` `(` $device `:` type($device) `)`\n              `affinity` `(` $queue_affinity `)`\n              `access` `(` $access `)`\n              `buffer` `(` $buffer `:` type($buffer) `)`\n              `` `[` $offset `for` $length `]`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a file handle that is backed by the given <code>buffer</code> contents. Behavior is undefined if the buffer contents change while the accesses are in-flight.</p> <p>Experimental as the exact interface for getting files from module contents still needs iteration. Most hardware APIs require a file descriptor or native platform handle but here we only have host pointers. When memory-mapped some systems allow for retrieval of the platform handle from a virtual address (GetMappedFileNameA/posix_mem_offset) but the APIs are sketchy and likely slow. Instead we should probably have a way to query for a file handle derived from the calling module by stack-walking and asking the VM module for its handle. Until we can figure this out this method will be marked epxerimental.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>access</code>mlir::iree_compiler::IREE::HAL::MemoryAccessBitfieldAttrvalid MemoryAccess"},{"location":"reference/mlir-dialects/HAL/#operands_47","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>buffer</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index <code>flags</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_28","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HAL/#fence-ops","title":"Fence ops","text":"<p>Ops for <code>!hal.fence</code> / <code>iree_hal_fence_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halfenceawait-halfenceawaitop","title":"<code>hal.fence.await</code> (HAL::FenceAwaitOp)","text":"<p>Asynchronous fence wait operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.await` `until` `(` `[` $fences `]` `)`\n              `timeout_millis` `(` $timeout_millis `)`\n              `:` type($status)\n              attr-dict-with-keyword\n</code></pre> <p>Yields the caller until all fences is reached. Returns the <code>status</code> of the fence after the wait, with a non-zero value indicating failure.</p> <p>Traits: <code>Util_YieldPoint</code></p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_48","title":"Operands:","text":"Operand Description <code>timeout_millis</code> 32-bit signless integer <code>fences</code> variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_29","title":"Results:","text":"Result Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencecreate-halfencecreateop","title":"<code>hal.fence.create</code> (HAL::FenceCreateOp)","text":"<p>Creates an unsignaled fence</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.create` `device` `(` $device `:` type($device) `)`\n              `flags` `(` $flags `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a fence that defines a point in time. By default fences will remain unsignaled unless they are explicitly signaled with <code>hal.fence.signal</code> or asynchronously signaled by the device by passing them as an operand to queue submission ops.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>flags</code>mlir::iree_compiler::IREE::HAL::FenceFlagBitfieldAttrvalid FenceFlag"},{"location":"reference/mlir-dialects/HAL/#operands_49","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_30","title":"Results:","text":"Result Description <code>result</code> fence"},{"location":"reference/mlir-dialects/HAL/#halfencefail-halfencefailop","title":"<code>hal.fence.fail</code> (HAL::FenceFailOp)","text":"<p>Fence failure operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.fail` `&lt;` $fence `:` type($fence) `&gt;`\n              `status` `(` $status `)`\n              attr-dict-with-keyword\n</code></pre> <p>Signals the fence with a failure. The <code>status</code> will be returned from each timepoint semaphores <code>hal.semaphore.query</code> and <code>hal.semaphore.signal</code> for the lifetime of each semaphore.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_50","title":"Operands:","text":"Operand Description <code>fence</code> fence <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencejoin-halfencejoinop","title":"<code>hal.fence.join</code> (HAL::FenceJoinOp)","text":"<p>Creates a fence from the given timepoints</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.join` `at` `(` `[` $fences `]` `)`\n              `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a fence that joins the input fences as a wait-all operation.</p> <p>Interfaces: <code>OpAsmOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_51","title":"Operands:","text":"Operand Description <code>fences</code> variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_31","title":"Results:","text":"Result Description <code>result</code> fence"},{"location":"reference/mlir-dialects/HAL/#halfencequery-halfencequeryop","title":"<code>hal.fence.query</code> (HAL::FenceQueryOp)","text":"<p>Fence query operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.query` `&lt;` $fence `:` type($fence) `&gt;`\n              `:` type($status)\n              attr-dict-with-keyword\n</code></pre> <p>Queries whether the fence has been reached and its status. Returns OK if the fence has been signaled successfully, DEFERRED if it is unsignaled, and otherwise an error indicating the failure.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_52","title":"Operands:","text":"Operand Description <code>fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_32","title":"Results:","text":"Result Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencesignal-halfencesignalop","title":"<code>hal.fence.signal</code> (HAL::FenceSignalOp)","text":"<p>Fence signal operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal.fence.signal` `&lt;` $fence `:` type($fence) `&gt;`\n              attr-dict-with-keyword\n</code></pre> <p>Signals the fence to indicate that the timepoints contained have been reached. Waiting work may begin immediately.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_53","title":"Operands:","text":"Operand Description <code>fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#instrument-ops","title":"Instrument ops","text":"<p>Ops for <code>!hal.instrument.*</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemoryload-halinstrumentmemoryloadop","title":"<code>hal.instrument.memory.load</code> (HAL::InstrumentMemoryLoadOp)","text":"<p>Emits a memory load instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.memory.load` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $base `[` $indices `]` `,` $loadValue\n              attr-dict `:` type($base) `,` type($result)\n</code></pre> <p>Emits a workgroup-specific memory load event indicating that a number of bytes from the given resolved pointer have been loaded by the workgroup.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_54","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>loadValue</code> any type <code>base</code> memref of any type values <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_33","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemorystore-halinstrumentmemorystoreop","title":"<code>hal.instrument.memory.store</code> (HAL::InstrumentMemoryStoreOp)","text":"<p>Emits a memory store instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.memory.store` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $base `[` $indices `]` `,` $storeValue\n              attr-dict `:` type($base) `,` type($result)\n</code></pre> <p>Emits a workgroup-specific memory store event indicating that a number of bytes have been stored to the given resolved pointer by the workgroup.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_55","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>storeValue</code> any type <code>base</code> memref of any type values <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_34","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentprint-halinstrumentprintop","title":"<code>hal.instrument.print</code> (HAL::InstrumentPrintOp)","text":"<p>Emits a human-readable printf-style string event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.print` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $format (`*` `(` $values^ `:` type($values) `)`)?\n              attr-dict\n</code></pre> <p>Formats a string using a limited subset of printf format specifiers and the provided values and then emits an <code>iree_instrument_dispatch_print_t</code> event. Final formatted string lengths may be limited to as much as 1024 characters and should be kept as small as possible to avoid easily exceeding the instrumentation storage buffers with redundant strings.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_56","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>values</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentvalue-halinstrumentvalueop","title":"<code>hal.instrument.value</code> (HAL::InstrumentValueOp)","text":"<p>Emits a scalar value instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.value` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n              $ordinal `=` $operand attr-dict `:` type($operand)\n</code></pre> <p>Emits a workgroup-specific typed value with the given workgroup-relative ordinal.</p> <p>This op will be preserved even if the output is not used as it is only for debugging purposes.</p>"},{"location":"reference/mlir-dialects/HAL/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>ordinal</code>::mlir::IntegerAttr8-bit integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_57","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>workgroupKey</code> index <code>operand</code> any type"},{"location":"reference/mlir-dialects/HAL/#results_35","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentworkgroup-halinstrumentworkgroupop","title":"<code>hal.instrument.workgroup</code> (HAL::InstrumentWorkgroupOp)","text":"<p>Emits a dispatch workgroup instrumentation event</p> <p>Syntax:</p> <pre><code>operation ::= `hal.instrument.workgroup` `` `[` $buffer `:` type($buffer) `]`\n              `dispatch` `(` $dispatchId `)`\n              attr-dict `:` type($workgroupKey)\n</code></pre> <p>Emits an <code>iree_instrument_dispatch_workgroup_t</code> event into the instrumentation stream. The workgroup event identifies the unique dispatch, its workgroup count, and the ID of the emitting workgroup within the dispatch. Optionally targets that support querying the processor ID executing the workgroup can attach that information for tracking purposes.</p> <p>On targets such as CPUs where entire workgroups execute as atomic units only one workgroup event should be emitted. On targets such as GPUs where there may be multiple invocations executing as part of a single workgroup only the first invocation within the workgroup should emit the workgroup event (by checking if the LocalInvocationIndex or threadIdx == 0, etc).</p> <p>The resulting workgroup key is used by subsequent workgroup-specific instrumentation events.</p>"},{"location":"reference/mlir-dialects/HAL/#operands_58","title":"Operands:","text":"Operand Description <code>buffer</code> memref of any type values <code>dispatchId</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_36","title":"Results:","text":"Result Description <code>workgroupKey</code> index"},{"location":"reference/mlir-dialects/HAL/#interface-ops","title":"Interface ops","text":"<p>Ops for <code>!hal.interface.*</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halinterfacebindingsubspan-halinterfacebindingsubspanop","title":"<code>hal.interface.binding.subspan</code> (HAL::InterfaceBindingSubspanOp)","text":"<p>Returns an alias to a subspan of interface binding data</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.binding.subspan` `set` `(` $set `)`\n              `binding` `(` $binding `)`\n              `type` `(` custom&lt;DescriptorType&gt;($descriptor_type) `)`\n              (`alignment` `(` $alignment^ `)`)?\n              (`offset` `(` $byte_offset^ `)`)?\n              (`flags` `(` $descriptor_flags^ `)`)?\n              attr-dict `:` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Returns a subspan of an interface binding storage buffer in a generic type. The exact shape, type, and alignment of the returned type are defined by the result type (tensor, memref, etc).</p> <p>An optional alignment indicates the byte alignment of the base binding resource. Note that the byte offset is added to the base and the alignment will be the minimum of the two.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>set</code>::mlir::IntegerAttrindex attribute <code>binding</code>::mlir::IntegerAttrindex attribute <code>descriptor_type</code>::mlir::iree_compiler::IREE::HAL::DescriptorTypeAttrvalid DescriptorType <code>alignment</code>::mlir::IntegerAttrindex attribute <code>descriptor_flags</code>::mlir::iree_compiler::IREE::HAL::DescriptorFlagsAttrvalid Descriptor flags"},{"location":"reference/mlir-dialects/HAL/#operands_59","title":"Operands:","text":"Operand Description <code>byte_offset</code> index <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_37","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceconstantload-halinterfaceconstantloadop","title":"<code>hal.interface.constant.load</code> (HAL::InterfaceConstantLoadOp)","text":"<p>Loads a constant value from the interface constant block</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.constant.load` `` `[` $index `]`\n              (`alignment` `(` $alignment^ `)`)?\n              (`values` `(` $values^ `)`)?\n              attr-dict `:` type($result)\n</code></pre> <p>Loads a scalar constant value from an executable IO push constant block. The value will be loaded from the given constant offset and will be bitcast (possibly with truncation or zero-extension) to the result type.</p> <p>An optional alignment indicates the byte alignment of potential values for the constant when it could be determined from analysis. If omitted the value may be anything and its interpretation is up to the usage. This is intended to provide pointer alignment-like semantics to constants that are used to index into binding resources.</p> <p>An optional set of values indicates all possible values that can be passed to the constant from all dispatch sites in the program. If omitted the value may be from an unanalyzable source (outside of the program, indirect, etc) and must be assumed to have any value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrsize_t <code>alignment</code>::mlir::IntegerAttrindex attribute <code>values</code>::mlir::ArrayAttrarray attribute"},{"location":"reference/mlir-dialects/HAL/#results_38","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupcount-halinterfaceworkgroupcountop","title":"<code>hal.interface.workgroup.count</code> (HAL::InterfaceWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid. Matches what was passed to the <code>hal.command_buffer.dispatch</code> command (or what was indirectly specified).</p> <p>Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.count[0] : index\n%y = hal.interface.workgroup.count[1] : index\n%z = hal.interface.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_39","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupid-halinterfaceworkgroupidop","title":"<code>hal.interface.workgroup.id</code> (HAL::InterfaceWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current tile in the range of <code>[0, hal.interface.workgroup.count)</code> along each XYZ dimension.</p> <p>Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.id[0] : index\n%y = hal.interface.workgroup.id[1] : index\n%z = hal.interface.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_40","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupsize-halinterfaceworkgroupsizeop","title":"<code>hal.interface.workgroup.size</code> (HAL::InterfaceWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `hal.interface.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = hal.interface.workgroup.size[0] : index\n%y = hal.interface.workgroup.size[1] : index\n%z = hal.interface.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_41","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HAL/#pipeline-layout-ops","title":"Pipeline layout ops","text":"<p>Ops for <code>!hal.pipeline_layout</code> / <code>iree_hal_pipeline_layout_t</code>.</p>"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutcreate-halpipelinelayoutcreateop","title":"<code>hal.pipeline_layout.create</code> (HAL::PipelineLayoutCreateOp)","text":"<p>Creates an pipeline layout</p> <p>Syntax:</p> <pre><code>operation ::= `hal.pipeline_layout.create` `device` `(` $device `:` type($device) `)`\n              `push_constants` `(` $push_constants `)`\n              `layouts` `(` `[` $set_layouts `]` `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates an pipeline layout from the given descriptor sets and push constant required size. Pipeline layouts can be shared across any executable that uses the same layout and push constant information. Sharing the layout between executables will reduce runtime binding overhead and it is often worth the cost to allow a small number of unused bindings in one executable such that it can share layouts with others that will be scheduled adjacent to it.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>push_constants</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_60","title":"Operands:","text":"Operand Description <code>device</code> device <code>set_layouts</code> variadic of descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#results_42","title":"Results:","text":"Result Description <code>result</code> pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutlookup-halpipelinelayoutlookupop","title":"<code>hal.pipeline_layout.lookup</code> (HAL::PipelineLayoutLookupOp)","text":"<p>Pipeline layout cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal.pipeline_layout.lookup` `device` `(` $device `:` type($device) `)`\n              `layout` `(` $layout `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized pipeline layout.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification"},{"location":"reference/mlir-dialects/HAL/#operands_61","title":"Operands:","text":"Operand Description <code>device</code> device"},{"location":"reference/mlir-dialects/HAL/#results_43","title":"Results:","text":"Result Description <code>result</code> pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#pseudo-ops","title":"Pseudo Ops","text":"<p>Pseudo ops for conversion support.</p>"},{"location":"reference/mlir-dialects/HAL/#haldispatchextern-haldispatchexternop","title":"<code>hal.dispatch.extern</code> (HAL::DispatchExternOp)","text":"<p>A dispatch of workgroups across a 3-dimensional grid</p> <p>Syntax:</p> <pre><code>operation ::= `hal.dispatch.extern` $export\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n              `count` `` custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              `layout` `(` $layout `)`\n              (`bindings` `(` $bindings^ `)`)?\n              `objects` `(` `{` custom&lt;TargetConditionObjects&gt;($targets,\n              $target_ordinals,\n              $target_objects,\n              $target_regions) `}` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches some number of workgroups across a 3-dimensional grid using a function defined externally in one or more referenced objects. Objects are declared per executable target and selected automatically during linking based on where the dispatch is used. Semantically this is equivalent to a <code>flow.dispatch.workgroups</code> but with the workgroup region invisible to the compiler. See <code>hal.executable</code> for more information about object linkage.</p> <p>Note that since this happens at tensor level the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors. Codegen guarantees this behavior but when sourcing externally authored dispatch functions it's critical that this behavior is observed.</p> <p>Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the platform-dependent primitives for accessing workgroup ID, size, and count intrinsics instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc). To aid deduplication code producing these external dispatches should try not to specialize early for particular shapes and instead emit the most generic code possible as having 500 slightly different <code>hal.dispatch.extern</code> ops pointing at the same object file is likely to require 500 copies of the object instead of 500 calls to the same object.</p> <p>Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it. If workgroup size or distribution varies based on the target device a <code>!hal.device</code> argument can be used by the workgroup count calculation region to factor in device parameters. See <code>hal.device.query</code> for more information on how to query information.</p> <pre><code>%r = hal.dispatch.extern \"some_function\"[%c5, %c5](%0, %1)\n    : (tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;\n  ...\n</code></pre> <p>The number of results of the operation is equal to the number of results in the type signature (<code>(tensor&lt;5x5xf32&gt;, tensor&lt;5xf32&gt;) -&gt; tensor&lt;5x5xf32&gt;</code>). Each tensor argument and result in the type signature has a corresponding pipeline layout slot and must be declared. If multiple arguments or results share the same layout slot they can be aliased using the <code>bindings</code> attribute and otherwise each is assumed unique.</p> <p>There are no <code>arguments</code> operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, <code>-&gt; %0</code> would tie the first argument to the result. In that case, there would be no separate block argument for the result.</p> <p>Objects for multiple targets can be specified and the ones used are selected based on their target and an optional condition region that returns true if the variant is valid for use on the provided runtime <code>!hal.device</code>. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>export</code>::mlir::StringAttrstring attribute <code>layout</code>::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification <code>targets</code>::mlir::ArrayAttrarray attribute <code>target_ordinals</code>::mlir::ArrayAttrArray of index ordinal attributes <code>target_objects</code>::mlir::ArrayAttrarray attribute <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute <code>bindings</code>::mlir::ArrayAttrHAL binding array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_62","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_44","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/HAL/#haltensorbarrier-haltensorbarrierop","title":"<code>hal.tensor.barrier</code> (HAL::TensorBarrierOp)","text":"<p>Signals a fence when all tensors are available</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.barrier` `join` `` `(` $sources `:` type($sources) `)`\n              `=` `` `&gt;`\n              $signal_fence `:` type($signal_fence)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a barrier that is used to indicate availability of an entire set of tensors by signaling a fence. The source tensors are returned for chaining.</p> <p>Interfaces: <code>TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/HAL/#operands_63","title":"Operands:","text":"Operand Description <code>sources</code> variadic of tensor of any type values <code>signal_fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_45","title":"Results:","text":"Result Description <code>results</code> variadic of tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#haltensorexport-haltensorexportop","title":"<code>hal.tensor.export</code> (HAL::TensorExportOp)","text":"<p>Exports a tensor to a HAL buffer view</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.export` $source\n              ($name^)?\n              (`into` `(` $target_storage^ `:` type($target_storage) `)`)?\n              `:`\n              custom&lt;TypeAlias&gt;($source_encoding, type($source)) (`{` $source_dims^ `}`)?\n              `-&gt;`\n              type($target)\n              attr-dict\n</code></pre> <p>Defines an export of an SSA-form tensor to an external HAL buffer view.</p> <p>The provided <code>source_encoding</code>, if different from the <code>source</code> type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.</p> <p>An optional <code>target_storage</code> buffer can be provided to hold the exported result. The export will fail at runtime if the storage is null or if it has insufficient capacity to store the output. The storage must be device-visible and defined for transfer-target and dispatch usage.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_64","title":"Operands:","text":"Operand Description <code>source</code> tensor of any type values <code>source_dims</code> variadic of index <code>target_storage</code> buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_46","title":"Results:","text":"Result Description <code>target</code> buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#haltensorimport-haltensorimportop","title":"<code>hal.tensor.import</code> (HAL::TensorImportOp)","text":"<p>Imports a tensor from a HAL buffer view</p> <p>Syntax:</p> <pre><code>operation ::= `hal.tensor.import` (`wait` `(` $wait_fence^ `)` `=` `` `&gt;`)?\n              $source\n              ($name^)?\n              `:` type($source) `-&gt;`\n              custom&lt;TypeAlias&gt;($target_encoding, type($target)) (`{` $target_dims^ `}`)?\n              attr-dict\n</code></pre> <p>Defines an import of an external HAL buffer view into a SSA-form tensor. An optional semaphore timepoint can be specified indicating when the buffer view is available for use. If no semaphore timepoint is provided it is assumed the buffer view is immediately available.</p> <p>The provided <code>target_encoding</code>, if different from the <code>target</code> type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HAL/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_65","title":"Operands:","text":"Operand Description <code>source</code> buffer or buffer_view <code>target_dims</code> variadic of index <code>wait_fence</code> fence"},{"location":"reference/mlir-dialects/HAL/#results_47","title":"Results:","text":"Result Description <code>target</code> tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#attributes_49","title":"Attributes","text":""},{"location":"reference/mlir-dialects/HAL/#affinityqueueattr","title":"AffinityQueueAttr","text":"<p>specifies a set of allowed queues for an operation</p> <p>WIP; see #10765. This may change in the future to either be a nested attribute on a larger affinity struct or be defined by an implementation of the affinity attr interface. For now this allows higher levels of the stack to specify queues such that the stream dialect can understand them and they can be lowered into the HAL dialect.</p> <p>Specifies that an annotated operation or scope is only allowed to execute on the set of queues (0-64) provided. Operations will not run on other queues.</p> <p>Example: <pre><code>// any queue\n#hal.affinity.queue&lt;*&gt;\n// queues 4 and 5\n#hal.affinity.queue&lt;[4, 5]&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters","title":"Parameters:","text":"Parameter C++ type Description mask <code>int64_t</code>"},{"location":"reference/mlir-dialects/HAL/#collectiveattr","title":"CollectiveAttr","text":"<p>collective operation and specification</p> <p>Syntax:</p> <pre><code>#hal.collective&lt;\n  CollectiveKind,   # kind\n  std::optional&lt;CollectiveReductionOp&gt;,   # reduction\n  CollectiveElementType   # element_type\n&gt;\n</code></pre> <p>Specifies the collective operation to perform and any mode bits required.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description kind <code>CollectiveKind</code> reduction <code>std::optional&lt;CollectiveReductionOp&gt;</code> element_type <code>CollectiveElementType</code>"},{"location":"reference/mlir-dialects/HAL/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"<p>descriptor set binding specification</p> <p>Syntax:</p> <pre><code>#hal.descriptor_set.binding&lt;\n  int64_t,   # ordinal\n  DescriptorType,   # type\n  std::optional&lt;DescriptorFlags&gt;   # flags\n&gt;\n</code></pre> <p>Specifies a single binding within a descriptor set layout.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> type <code>DescriptorType</code> flags <code>std::optional&lt;DescriptorFlags&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"<p>descriptor set layout specification</p> <p>Syntax:</p> <pre><code>#hal.descriptor_set.layout&lt;\n  int64_t,   # ordinal\n  ::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;,   # bindings\n  std::optional&lt;DescriptorSetLayoutFlags&gt;   # flags\n&gt;\n</code></pre> <p>Specifies the layout information of a single set of descriptors used within an pipeline layout. Multiple of these sets may be used by a single entry point to allow for bindings with similar update frequencies to be grouped.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> bindings <code>::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;</code> flags <code>std::optional&lt;DescriptorSetLayoutFlags&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#descriptortypeattr","title":"DescriptorTypeAttr","text":"<p>valid DescriptorType</p> <p>Syntax:</p> <pre><code>#hal.descriptor_type&lt;\n  ::mlir::iree_compiler::IREE::HAL::DescriptorType   # value\n&gt;\n</code></pre> <p>Enum cases: * uniform_buffer (<code>UniformBuffer</code>) * storage_buffer (<code>StorageBuffer</code>)</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::HAL::DescriptorType</code> an enum of type DescriptorType"},{"location":"reference/mlir-dialects/HAL/#devicetargetattr","title":"DeviceTargetAttr","text":"<p>generic device target specification</p> <p>Specifies the properties of a target runtime device. Target devices are specified with a canonical identifier matching those used by the runtime (such as <code>cpu</code>, <code>vulkan</code>, etc). Target devices may support several target executable formats specified with <code>#hal.executable.target</code>. An optional configuration dictionary allows for overriding backend defaults.</p> <p>Example: <pre><code>#hal.device.target&lt;\"llvm-cpu\", {\n  executable_targets = [\n    #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_32\"&gt;,\n    #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_64\"&gt;,\n  ]\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description deviceID <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executableobjectattr","title":"ExecutableObjectAttr","text":"<p>object file reference</p> <p>Defines an object file that can be linked into executables. Today this is only supported for external file references with paths the compiler can successfully resolve from its current working directory. Inlined data can optionally be provided to avoid the need for file system access and ensure the data source is attached to the IR as it makes its way through multiple compiler stages or reproducers.</p> <p>Future revisions may change this to an interface that allows both internal and external resources to define the object contents. Linking needs to be updated to support various object compositions and certain backends may require additional infrastructure support.</p> <p>In the long term the goal is to allow combinations of declared objects and generated code in order to give control of linking behavior to frontends. Instead of needing global command line flags to link in additional blobs the frontend can emit executables with the dependencies already defined per variant without needing to reach into the IREE compiler code.</p> <p>Example: <pre><code>#hal.executable.object&lt;{path = \"some/file.obj\"}&gt;\n#hal.executable.object&lt;{\n  path = \"some/embedded/file.obj\",\n  data = dense&lt;[...]&gt; : vector&lt;2048xi8&gt;\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description path <code>StringAttr</code> data <code>DenseIntElementsAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"<p>target-specific object file references</p> <p>A dictionary mapping executable target specifications to a list of objects. This is used to allow layers of the stack that support multi-targeting to specify information used during lowering into each particular target.</p> <p>The key attributes are matched against each target variant based on the backend and format as well as any configuration data provided. When comparing the configuration only fields present in both the key and target variant will be checked and must match. This allows specification of generic sets (\"all x86_64 targets get these objects\") as well as specific ones (\"only x86_64 targets with vector_size = 64 get these objects\").</p> <p>Example: <pre><code>#hal.executable.objects&lt;{\n  #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-arm_64\"&gt; = [\n    #hal.executable.object&lt;{path = \"some/file_arm_64.obj\"}&gt;\n  ],\n  #hal.executable.target&lt;\"llvm-cpu\", \"embedded-elf-x86_64\"&gt; = [\n    #hal.executable.object&lt;{path = \"some/file_x86_64.obj\"}&gt;\n  ]\n}&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description targets <code>ArrayAttr</code> targetObjects <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/HAL/#executabletargetattr","title":"ExecutableTargetAttr","text":"<p>generic executable target specification</p> <p>Specifies how to compile an executable for a specific target backend. A backend is used to translate and serialize the executable into the final form passed to the runtime. The format of the executable is a target-specific value indicating the required runtime support to load the deployed artifact. An optionally provided configuration dictionary overrides backend-specific defaults.</p> <p>Example: <pre><code>  // Produce a system-native ELF for x86-64 systems using the LLVM backend:\n  #hal.executable.target&lt;\"llvm-cpu\", \"system-elf-x86_64\", {\n    triple = \"x86_64-unknown-linux-elf\",\n    cpu = \"host\",\n    cpu_features = \"host\",\n    abi = \"lp32\",\n    ...\n  }&gt;\n</code></pre></p> <p>The same compilation backend may be used to translate executables for several different runtime devices. Likewise the same runtime device may use one of many different executable targets. Assume an N:M mapping between the two in all cases.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description backend <code>StringAttr</code> format <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/HAL/#interfacebindingattr","title":"InterfaceBindingAttr","text":"<p>interface binding specification</p> <p>Syntax:</p> <pre><code>#hal.interface.binding&lt;\n  int64_t,   # set\n  int64_t   # binding\n&gt;\n</code></pre> <p>Specifies the descriptor set and binding ordinal of a particular layout binding.</p> <p>Example: <pre><code>#hal.interface.binding&lt;0, 1&gt;\n</code></pre></p>"},{"location":"reference/mlir-dialects/HAL/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description set <code>int64_t</code> binding <code>int64_t</code>"},{"location":"reference/mlir-dialects/HAL/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"<p>executable entry point layout specification</p> <p>Syntax:</p> <pre><code>#hal.pipeline.layout&lt;\n  int64_t,   # pushConstants\n  ::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;   # setLayouts\n&gt;\n</code></pre> <p>Specifies the layout information used for interacting with executable functions. This allows host code to correctly map parameters to the lower-level target-specific argument passing behavior.</p>"},{"location":"reference/mlir-dialects/HAL/#parameters_10","title":"Parameters:","text":"Parameter C++ type Description pushConstants <code>int64_t</code> setLayouts <code>::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;</code>"},{"location":"reference/mlir-dialects/HAL/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/HAL/#allocator","title":"allocator","text":"<p>Allocates buffers for a particular device memory space.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer","title":"buffer","text":"<p>A memory buffer with a specific memory_type that is used to describe the capabilities and behavior of the backing memory of the buffer. Buffers may be any mix of host-accessible, host-coherent, or device-accessible for various usages. Depending on these memory types the buffers may be mapped for access on the host as memory though certain restrictions may be imposed.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer_view","title":"buffer_view","text":"<p>A shaped and typed buffer reference. This just wraps an existing hal.buffer with its associated metadata to make it easier to pass across ABI boundaries. In most cases buffer views can be elided entirely by the compiler and they'll only be seen when calling external functions.</p>"},{"location":"reference/mlir-dialects/HAL/#collectivechannel","title":"collective.channel","text":"<p>Channel identifier used to allow for participation in multiple collective groups.</p>"},{"location":"reference/mlir-dialects/HAL/#command_buffer","title":"command_buffer","text":"<p>Asynchronous command buffer recording interface. Commands are recorded by the implementation for later submission to command queues.</p>"},{"location":"reference/mlir-dialects/HAL/#descriptor_set_layout","title":"descriptor_set_layout","text":"<p>Descriptor set layout.</p>"},{"location":"reference/mlir-dialects/HAL/#device","title":"device","text":"<p>Logical device instance.</p>"},{"location":"reference/mlir-dialects/HAL/#event","title":"event","text":"<p>Events are used for defining synchronization scopes within CommandBuffers. An event only exists within a single CommandBuffer and must not be used across CommandBuffers from the same device or others.</p>"},{"location":"reference/mlir-dialects/HAL/#executable","title":"executable","text":"<p>A prepared and ready-to-dispatch executable.</p>"},{"location":"reference/mlir-dialects/HAL/#fence","title":"fence","text":"<p>A set of semaphore timepoints defining a common point in time across multiple timelines.</p>"},{"location":"reference/mlir-dialects/HAL/#buffer_1","title":"buffer","text":"<p>A stateless file handle that can be read/written using queue-ordered transfer operations.</p>"},{"location":"reference/mlir-dialects/HAL/#pipeline_layout","title":"pipeline_layout","text":"<p>A pipeline layout describing the descriptor sets and push constants used.</p>"},{"location":"reference/mlir-dialects/HALInline/","title":"HAL/Inline","text":""},{"location":"reference/mlir-dialects/HALInline/#hal_inline-dialect","title":"'hal_inline' Dialect","text":"<p>IREE inline HAL interop runtime module dialect.</p> <p>Low-level dialect for limited in-process ABI interop with the full HAL. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.</p> <p>This dialect can be used alongside the full HAL but is intended for use in standalone configurations or paired with the <code>hal_loader</code> dialect which also carries the same usage restrictions.</p> <p>See <code>hal_inline.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'hal_inline' Dialect<ul> <li>Operations<ul> <li>Buffer ops<ul> <li>hal_inline.buffer.allocate.initialized (HAL::Inline::BufferAllocateInitializedOp)</li> <li>hal_inline.buffer.allocate (HAL::Inline::BufferAllocateOp)</li> <li>hal_inline.buffer.length (HAL::Inline::BufferLengthOp)</li> <li>hal_inline.buffer.storage (HAL::Inline::BufferStorageOp)</li> <li>hal_inline.buffer.subspan (HAL::Inline::BufferSubspanOp)</li> <li>hal_inline.buffer.wrap (HAL::Inline::BufferWrapOp)</li> </ul> </li> <li>Buffer view ops<ul> <li>hal_inline.buffer_view.assert (HAL::Inline::BufferViewAssertOp)</li> <li>hal_inline.buffer_view.buffer (HAL::Inline::BufferViewBufferOp)</li> <li>hal_inline.buffer_view.create (HAL::Inline::BufferViewCreateOp)</li> <li>hal_inline.buffer_view.dim (HAL::Inline::BufferViewDimOp)</li> <li>hal_inline.buffer_view.element_type (HAL::Inline::BufferViewElementTypeOp)</li> <li>hal_inline.buffer_view.encoding_type (HAL::Inline::BufferViewEncodingTypeOp)</li> <li>hal_inline.buffer_view.rank (HAL::Inline::BufferViewRankOp)</li> <li>hal_inline.buffer_view.trace (HAL::Inline::BufferViewTraceOp)</li> </ul> </li> <li>Device ops<ul> <li>hal_inline.device.query (HAL::Inline::DeviceQueryOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HALInline/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HALInline/#buffer-ops","title":"Buffer ops","text":"<p>Ops for <code>!hal.buffer</code> / <code>iree_hal_buffer_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocateinitialized-halinlinebufferallocateinitializedop","title":"<code>hal_inline.buffer.allocate.initialized</code> (HAL::Inline::BufferAllocateInitializedOp)","text":"<p>Buffer allocation with cloning</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.allocate.initialized` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `alignment` `(` $minimum_alignment `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), ref($length)) `in` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer with a copy of the provided contents.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands","title":"Operands:","text":"Operand Description <code>minimum_alignment</code> index <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results","title":"Results:","text":"Result Description <code>result</code> buffer <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocate-halinlinebufferallocateop","title":"<code>hal_inline.buffer.allocate</code> (HAL::Inline::BufferAllocateOp)","text":"<p>Empty buffer allocation operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.allocate` `alignment` `(` $minimum_alignment `)`\n              `:` custom&lt;SizeAwareType&gt;(type($result), $allocation_size) `in` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Allocates a buffer of the given size. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_1","title":"Operands:","text":"Operand Description <code>minimum_alignment</code> index <code>allocation_size</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_1","title":"Results:","text":"Result Description <code>result</code> buffer <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferlength-halinlinebufferlengthop","title":"<code>hal_inline.buffer.length</code> (HAL::Inline::BufferLengthOp)","text":"<p>Buffer byte length accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.length` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_2","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferstorage-halinlinebufferstorageop","title":"<code>hal_inline.buffer.storage</code> (HAL::Inline::BufferStorageOp)","text":"<p>Buffer backing storage accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.storage` `&lt;` $buffer `:` type($buffer) `&gt;`\n              `:` type($storage)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the host backing storage of the HAL buffer as a subspan limited to to the buffer's logical range (meaning that byte 0 of the returned buffer is byte 0 of the HAL buffer).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_3","title":"Operands:","text":"Operand Description <code>buffer</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#results_3","title":"Results:","text":"Result Description <code>storage</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffersubspan-halinlinebuffersubspanop","title":"<code>hal_inline.buffer.subspan</code> (HAL::Inline::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_4","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_4","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferwrap-halinlinebufferwrapop","title":"<code>hal_inline.buffer.wrap</code> (HAL::Inline::BufferWrapOp)","text":"<p>Host buffer wrapping operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer.wrap` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Tries wrapping a !hal.buffer around host memory backed by the given byte buffer.</p> <p>Interfaces: <code>OpAsmOpInterface</code>, <code>SizeAwareOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_5","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/HALInline/#results_5","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#buffer-view-ops","title":"Buffer view ops","text":"<p>Ops for <code>!hal.buffer_view</code> / <code>iree_hal_buffer_view_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewassert-halinlinebufferviewassertop","title":"<code>hal_inline.buffer_view.assert</code> (HAL::Inline::BufferViewAssertOp)","text":"<p>Buffer view contents assertion</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.assert` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `message` `(` $message `)`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              attr-dict-with-keyword\n</code></pre> <p>Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if <code>std.assert</code> had been used.</p>"},{"location":"reference/mlir-dialects/HALInline/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_6","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewbuffer-halinlinebufferviewbufferop","title":"<code>hal_inline.buffer_view.buffer</code> (HAL::Inline::BufferViewBufferOp)","text":"<p>Buffer view buffer accessor</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.buffer` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer backing this view's contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_7","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_6","title":"Results:","text":"Result Description <code>result</code> buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewcreate-halinlinebufferviewcreateop","title":"<code>hal_inline.buffer_view.create</code> (HAL::Inline::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_8","title":"Operands:","text":"Operand Description <code>source_buffer</code> buffer <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/HALInline/#results_7","title":"Results:","text":"Result Description <code>result</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewdim-halinlinebufferviewdimop","title":"<code>hal_inline.buffer_view.dim</code> (HAL::Inline::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.dim` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `` `[` $index `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_9","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_8","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewelement_type-halinlinebufferviewelementtypeop","title":"<code>hal_inline.buffer_view.element_type</code> (HAL::Inline::BufferViewElementTypeOp)","text":"<p>Buffer view element type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.element_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_10","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_9","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewencoding_type-halinlinebufferviewencodingtypeop","title":"<code>hal_inline.buffer_view.encoding_type</code> (HAL::Inline::BufferViewEncodingTypeOp)","text":"<p>Buffer view encoding type query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.encoding_type` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the encoding type of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_11","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_10","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewrank-halinlinebufferviewrankop","title":"<code>hal_inline.buffer_view.rank</code> (HAL::Inline::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.rank` `&lt;` $buffer_view `:` type($buffer_view) `&gt;`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#operands_12","title":"Operands:","text":"Operand Description <code>buffer_view</code> buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_11","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewtrace-halinlinebufferviewtraceop","title":"<code>hal_inline.buffer_view.trace</code> (HAL::Inline::BufferViewTraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.buffer_view.trace` $key `=`\n              $operands `:` type($operands)\n              attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.</p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_13","title":"Operands:","text":"Operand Description <code>operands</code> variadic of buffer_view"},{"location":"reference/mlir-dialects/HALInline/#device-ops","title":"Device ops","text":"<p>Ops for <code>!hal.device</code> / <code>iree_hal_device_t</code>.</p>"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinedevicequery-halinlinedevicequeryop","title":"<code>hal_inline.device.query</code> (HAL::Inline::DeviceQueryOp)","text":"<p>Returns a runtime configuration parameter from the device</p> <p>Syntax:</p> <pre><code>operation ::= `hal_inline.device.query` `key` `(` $category `:` `` `:` $key `)`\n              `:` type($ok) `,` type($value)\n              (`=` $default_value^)?\n              attr-dict-with-keyword\n</code></pre> <p>Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.</p> <p>This is roughly equivalent to the <code>sysconf</code> linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent.</p> <p>Users of the op must check the <code>ok</code> result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with <code>ex.</code> when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.</p> <p>Well-known keys: (none yet)</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALInline/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>category</code>::mlir::StringAttrstring attribute <code>key</code>::mlir::StringAttrstring attribute <code>default_value</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HALInline/#results_12","title":"Results:","text":"Result Description <code>ok</code> 1-bit signless integer <code>value</code> any type"},{"location":"reference/mlir-dialects/HALLoader/","title":"HAL/Loader","text":""},{"location":"reference/mlir-dialects/HALLoader/#hal_loader-dialect","title":"'hal_loader' Dialect","text":"<p>IREE HAL inline executable loader runtime module dialect.</p> <p>Low-level dialect for dynamically loading executables and dispatching work. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.</p> <p>This dialect can be used alongside the full HAL but is intended for use in conjunction with the <code>hal_inline</code> dialect which also carries the same usage restrictions.</p> <p>See <code>hal_loader.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'hal_loader' Dialect<ul> <li>Operations<ul> <li>Executable ops<ul> <li>hal_loader.executable.dispatch (HAL::Loader::ExecutableDispatchOp)</li> <li>hal_loader.executable.dispatch.symbol (HAL::Loader::ExecutableDispatchSymbolOp)</li> <li>hal_loader.executable.load (HAL::Loader::ExecutableLoadOp)</li> <li>hal_loader.executable.lookup (HAL::Loader::ExecutableLookupOp)</li> <li>hal_loader.executable.query_support (HAL::Loader::ExecutableQuerySupportOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/HALLoader/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/HALLoader/#executable-ops","title":"Executable ops","text":"<p>Ops for <code>!hal.executable</code> / <code>iree_hal_executable_t</code>.</p>"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatch-halloaderexecutabledispatchop","title":"<code>hal_loader.executable.dispatch</code> (HAL::Loader::ExecutableDispatchOp)","text":"<p>Inline executable dispatch operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.dispatch` `executable` `(` $executable `:` type($executable) `)`\n              `` `[` $entry_point `]`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              (`constants` `(` `[` $push_constants^ `]` `)`)?\n              `bindings` `(` `[`\n              custom&lt;DispatchBindings&gt;($binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches execution to an executable entry point with the given parameters.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HALLoader/#operands","title":"Operands:","text":"Operand Description <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index <code>push_constants</code> variadic of 32-bit signless integer <code>binding_buffers</code> variadic of a reference counted byte buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatchsymbol-halloaderexecutabledispatchsymbolop","title":"<code>hal_loader.executable.dispatch.symbol</code> (HAL::Loader::ExecutableDispatchSymbolOp)","text":"<p>Inline executable dispatch operation</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.dispatch.symbol` `executable` `(` $executable `:` type($executable) `)`\n              `target` `(` $entry_point `)`\n              `workgroups` `(` `[`\n              $workgroup_x `,`\n              $workgroup_y `,`\n              $workgroup_z\n              `]` `)`\n              (`constants` `(` `[` $push_constants^ `]` `)`)?\n              `bindings` `(` `[`\n              custom&lt;DispatchBindings&gt;($binding_buffers,\n              type($binding_buffers),\n              $binding_offsets,\n              $binding_lengths)\n              `]` `)`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches execution to an executable entry point with the given parameters. The entry point is a symbolic reference to an exported entry point.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_1","title":"Operands:","text":"Operand Description <code>executable</code> executable <code>workgroup_x</code> index <code>workgroup_y</code> index <code>workgroup_z</code> index <code>push_constants</code> variadic of 32-bit signless integer <code>binding_buffers</code> variadic of a reference counted byte buffer <code>binding_offsets</code> variadic of index <code>binding_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutableload-halloaderexecutableloadop","title":"<code>hal_loader.executable.load</code> (HAL::Loader::ExecutableLoadOp)","text":"<p>Dynamically loads an executable</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.load` `format` `(` $format `)`\n              `data` `(` $data `)`\n              (`constants` `(` `[` $constants^ `]` `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates, loads, and dynamically links an executable.</p> <p>Optional constants provide for specialization of the executable based on runtime-derived parameters.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_2","title":"Operands:","text":"Operand Description <code>data</code> a reference counted byte buffer <code>constants</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HALLoader/#results","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablelookup-halloaderexecutablelookupop","title":"<code>hal_loader.executable.lookup</code> (HAL::Loader::ExecutableLookupOp)","text":"<p>Executable cache lookup pseudo-op</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.lookup` `executable` `(` $executable `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_1","title":"Results:","text":"Result Description <code>result</code> executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablequery_support-halloaderexecutablequerysupportop","title":"<code>hal_loader.executable.query_support</code> (HAL::Loader::ExecutableQuerySupportOp)","text":"<p>Queries whether an executable format is supported</p> <p>Syntax:</p> <pre><code>operation ::= `hal_loader.executable.query_support` `format` `(` $executable_format `)`\n              `:` type($supported)\n              attr-dict-with-keyword\n</code></pre> <p>Returns true if the given format is supported by the device loader. This does not guarantee that loading will succeed as the executable may require functionality that cannot be met my the hosting runtime environment.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/HALLoader/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>executable_format</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_2","title":"Results:","text":"Result Description <code>supported</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/IOParameters/","title":"IO/Parameters","text":""},{"location":"reference/mlir-dialects/IOParameters/#io_parameters-dialect","title":"'io_parameters' Dialect","text":"<p>External parameter resource management APIs.</p> <p>Parameters are externalized storage for resources that are asynchronously accessible and device-aware. Parameters can be read or written on the same device timelines as the operations that consume or produce them and with locality pinning to ensure memory doesn't need to move. Parameters are referenced by a scope and a key, with the scope being optional but strongly recommended as a way to distinguish sets of parameters that may exist when multiple model parts are compiled together and would otherwise collide.</p> <p>Parameters are provided by a few operations implementing a virtual interface and can support shared parameters (same storage used in multiple contexts, or outliving a single instantiation in a context), in-memory caches, memory-mapped files (including directly using the mapped memory for execution when devices support it), <code>iree_hal_file_t</code> usage for device-supported I/O, and parameter subsetting for things like runtime sharding.</p> <p>Alongside read(+load) and write operations gather and scatter allow for batching of large numbers of reads and writes into/from single buffers. For parameter providers that can batch operations this allows for a handful (~1-4) of calls out to perform many more operations (~thousands). Modeling the gather/scatter also gives us a point where we could extract the mapping and use it to repack files/defrag memory in the future.</p> <p>See <code>io_parameters.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'io_parameters' Dialect<ul> <li>Operations<ul> <li>Parameter I/O ops<ul> <li>io_parameters.gather (IO::Parameters::GatherOp)</li> <li>io_parameters.load (IO::Parameters::LoadOp)</li> <li>io_parameters.scatter (IO::Parameters::ScatterOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IOParameters/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IOParameters/#parameter-io-ops","title":"Parameter I/O ops","text":"<p>Ops parameter I/O.</p>"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersgather-ioparametersgatherop","title":"<code>io_parameters.gather</code> (IO::Parameters::GatherOp)","text":"<p>Gathers multiple parameters from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.gather` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `{`\n              custom&lt;ParameterGatherOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              $target_buffer, type($target_buffer), $target_offsets, $target_lengths)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously gathers one or more parameters into a single target buffer. This is equivalent to one read per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_offsets</code> variadic of 64-bit signless integer <code>target_buffer</code> buffer <code>target_offsets</code> variadic of index <code>target_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersload-ioparametersloadop","title":"<code>io_parameters.load</code> (IO::Parameters::LoadOp)","text":"<p>Reads one or more parameters from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.load` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `type` `(` $memory_types `)`\n              `usage` `(` $buffer_usage `)`\n              `{`\n              custom&lt;ParameterLoadOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              type($results), $lengths)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads one or more parameters from an external parameter provider and returns the resulting buffers. Depending on the parameter and buffer types this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if an allocate + read had been used.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>memory_types</code>mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType <code>buffer_usage</code>mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/IOParameters/#operands_1","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_offsets</code> variadic of 64-bit signless integer <code>lengths</code> variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#results","title":"Results:","text":"Result Description <code>results</code> variadic of buffer"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersscatter-ioparametersscatterop","title":"<code>io_parameters.scatter</code> (IO::Parameters::ScatterOp)","text":"<p>Scatters multiple parameters to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `io_parameters.scatter` `&lt;` $device `:` type($device) `&gt;`\n              `affinity` `(` $queue_affinity `)`\n              `wait` `(` $wait_fence `)`\n              `signal` `(` $signal_fence `)`\n              `{`\n              custom&lt;ParameterScatterOperations&gt;(\n              $source_buffer, type($source_buffer), $source_offsets, $source_lengths,\n              $target_scope, $target_keys, $target_offsets)\n              `}`\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously scatters one or more parameters from a single source buffer into one or more parameters. This is equivalent to one write per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IOParameters/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_keys</code>::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands_2","title":"Operands:","text":"Operand Description <code>device</code> device <code>queue_affinity</code> 64-bit signless integer <code>wait_fence</code> fence <code>signal_fence</code> fence <code>source_buffer</code> buffer <code>source_offsets</code> variadic of index <code>source_lengths</code> variadic of index <code>target_offsets</code> variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/IREEInput/","title":"IREEInput","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_input-dialect","title":"'iree_input' Dialect","text":"<p>Public ops/type/attributes legal for input to IREE's compiler.</p> <p>IREE's compiler allows as input a number of common dialects. This dialect contains structural and unique ops that do not exist elsewhere or that IREE has an interest in maintaining as a stable set.</p> <p>The contents of this dialect often mirror various constructs in IREE's internal implementation. The focus here is on simplicity and stability over time. Generally, this dialect does not use \"advanced\" features and should be broadly source compatible over a range of LLVM versions. There are of course, limits, and source-compatibility is not guaranteed, since LLVM/MLIR's API surface is itself unstable.</p> <ul> <li>'iree_input' Dialect<ul> <li>Operations<ul> <li>Buffer and buffer view ops<ul> <li>iree_input.buffer.subspan (Input::BufferSubspanOp)</li> <li>iree_input.buffer_view.create (Input::BufferViewCreateOp)</li> <li>iree_input.buffer_view.dim (Input::BufferViewDimOp)</li> <li>iree_input.buffer_view.rank (Input::BufferViewRankOp)</li> </ul> </li> <li>Byte buffer ops<ul> <li>iree_input.byte_buffer.constant (Input::ByteBufferConstantOp)</li> </ul> </li> <li>Compiler hint ops<ul> <li>iree_input.optimization_barrier (Input::OptimizationBarrierOp)</li> </ul> </li> <li>Dispatch ops<ul> <li>iree_input.dispatch (Input::DispatchOp)</li> </ul> </li> <li>Executable source ops<ul> <li>iree_input.executable.export (Input::ExecutableExportOp)</li> <li>iree_input.executable.source_end (Input::ExecutableSourceEndOp)</li> <li>iree_input.executable.source (Input::ExecutableSourceOp)</li> </ul> </li> <li>Global variable ops<ul> <li>iree_input.global.address (Input::GlobalAddressOp)</li> <li>iree_input.global.load.indirect (Input::GlobalLoadIndirectOp)</li> <li>iree_input.global.load (Input::GlobalLoadOp)</li> <li>iree_input.global (Input::GlobalOp)</li> <li>iree_input.global.store.indirect (Input::GlobalStoreIndirectOp)</li> <li>iree_input.global.store (Input::GlobalStoreOp)</li> </ul> </li> <li>Mutable list ops<ul> <li>iree_input.list.create (Input::ListCreateOp)</li> <li>iree_input.list.get (Input::ListGetOp)</li> <li>iree_input.list.resize (Input::ListResizeOp)</li> <li>iree_input.list.set (Input::ListSetOp)</li> <li>iree_input.list.size (Input::ListSizeOp)</li> </ul> </li> <li>Pseudo ops for conversion support<ul> <li>iree_input.tensor.export (Input::TensorExportOp)</li> <li>iree_input.tensor.import (Input::TensorImportOp)</li> </ul> </li> <li>Tensor ops<ul> <li>iree_input.tensor.bitcast (Input::TensorBitCastOp)</li> <li>iree_input.tensor.clone (Input::TensorCloneOp)</li> <li>iree_input.tensor.load (Input::TensorLoadOp)</li> <li>iree_input.tensor.reshape (Input::TensorReshapeOp)</li> <li>iree_input.tensor.slice (Input::TensorSliceOp)</li> <li>iree_input.tensor.splat (Input::TensorSplatOp)</li> <li>iree_input.tensor.store (Input::TensorStoreOp)</li> <li>iree_input.tensor.trace (Input::TensorTraceOp)</li> <li>iree_input.tensor.update (Input::TensorUpdateOp)</li> </ul> </li> <li>Utility ops<ul> <li>iree_input.align (Input::AlignOp)</li> <li>iree_input.null (Input::NullOp)</li> </ul> </li> <li>Workgroup dispatch ops<ul> <li>iree_input.dispatch.workgroup.count (Input::DispatchWorkgroupCountOp)</li> <li>iree_input.dispatch.workgroup.id (Input::DispatchWorkgroupIDOp)</li> <li>iree_input.dispatch.workgroup.size (Input::DispatchWorkgroupSizeOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>DescriptorSetBindingAttr</li> <li>DescriptorSetLayoutAttr</li> <li>DescriptorTypeAttr</li> <li>DeviceTargetAttr</li> <li>ExecutableObjectAttr</li> <li>ExecutableObjectsAttr</li> <li>ExecutableTargetAttr</li> <li>PipelineLayoutAttr</li> </ul> </li> <li>Type constraints<ul> <li>list</li> </ul> </li> <li>Types<ul> <li>BufferType</li> <li>BufferViewType</li> <li>ByteBufferType</li> <li>ListType</li> <li>PtrType</li> <li>VariantType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IREEInput/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffer-and-buffer-view-ops","title":"Buffer and buffer view ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffersubspan-inputbuffersubspanop","title":"<code>iree_input.buffer.subspan</code> (Input::BufferSubspanOp)","text":"<p>Buffer subspan operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer.subspan` `&lt;` $source_buffer `:` type($source_buffer) `&gt;`\n              `` `[` $source_offset `,` $length `]`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a reference to a subspan of the buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands","title":"Operands:","text":"Operand Description <code>source_buffer</code> Buffer is an untyped bag of bits with no shape or dtype <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results","title":"Results:","text":"Result Description <code>result</code> Buffer is an untyped bag of bits with no shape or dtype"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewcreate-inputbufferviewcreateop","title":"<code>iree_input.buffer_view.create</code> (Input::BufferViewCreateOp)","text":"<p>Buffer view reference initializer</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n              `` `[` $source_offset `,` $source_length `]`\n              `shape` `(` `[` $shape `]` `)`\n              `type` `(` $element_type `)`\n              `encoding` `(` $encoding_type `)`\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_1","title":"Operands:","text":"Operand Description <code>source_buffer</code> Buffer is an untyped bag of bits with no shape or dtype <code>source_offset</code> index <code>source_length</code> index <code>element_type</code> 32-bit signless integer <code>encoding_type</code> 32-bit signless integer <code>shape</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_1","title":"Results:","text":"Result Description <code>result</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewdim-inputbufferviewdimop","title":"<code>iree_input.buffer_view.dim</code> (Input::BufferViewDimOp)","text":"<p>Buffer view dimension value query</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.dim` $buffer_view `,` $index attr-dict `:` type($result)\n</code></pre> <p>Returns the value of the given dimension.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>index</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_2","title":"Operands:","text":"Operand Description <code>buffer_view</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_2","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewrank-inputbufferviewrankop","title":"<code>iree_input.buffer_view.rank</code> (Input::BufferViewRankOp)","text":"<p>Buffer view rank query</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.buffer_view.rank` $buffer_view attr-dict `:` type($result)\n</code></pre> <p>Returns the rank of the buffer view.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_3","title":"Operands:","text":"Operand Description <code>buffer_view</code> View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_3","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#byte-buffer-ops","title":"Byte buffer ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbyte_bufferconstant-inputbytebufferconstantop","title":"<code>iree_input.byte_buffer.constant</code> (Input::ByteBufferConstantOp)","text":"<p>Constant host-side byte buffer</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.byte_buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::StringAttrstring attribute <code>alignment</code>::mlir::IntegerAttrindex attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_4","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/IREEInput/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputoptimization_barrier-inputoptimizationbarrierop","title":"<code>iree_input.optimization_barrier</code> (Input::OptimizationBarrierOp)","text":"<p>Prevents compiler optimizations across a value.</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.optimization_barrier` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.</p> <p>Traits: <code>SameOperandsAndResultType</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_4","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#results_5","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatch-inputdispatchop","title":"<code>iree_input.dispatch</code> (Input::DispatchOp)","text":"<p>A dispatch of an executable across a grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch` $entry_point\n              (`[` $workload^ `]`)? ``\n              `(` $arguments `)` attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($arguments),\n              type($arguments), $argument_dims,\n              type($results), $result_dims,\n              $tied_operands)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_point</code>::mlir::SymbolRefAttrsymbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_5","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>arguments</code> variadic of any type <code>argument_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_6","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#executable-source-ops","title":"Executable source ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutableexport-inputexecutableexportop","title":"<code>iree_input.executable.export</code> (Input::ExecutableExportOp)","text":"<p>Executable entry point declaration</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              `ordinal` `(` $ordinal `)`\n              `layout` `(` $layout `)`\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Input::ExecutableSourceOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrsize_t <code>layout</code>::mlir::iree_compiler::IREE::Input::PipelineLayoutAttrexecutable entry point layout specification <code>workgroup_size</code>::mlir::ArrayAttrindex array attribute <code>subgroup_size</code>::mlir::IntegerAttrsize_t <code>workgroup_local_memory</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource_end-inputexecutablesourceendop","title":"<code>iree_input.executable.source_end</code> (Input::ExecutableSourceEndOp)","text":"<p>Terminator pseudo-op for the executable source op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.source_end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Input::ExecutableSourceOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource-inputexecutablesourceop","title":"<code>iree_input.executable.source</code> (Input::ExecutableSourceOp)","text":"<p>Generic source contents of an executable op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.executable.source` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              ``\n              regions\n</code></pre> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Input::ExecutableSourceEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>objects</code>::mlir::iree_compiler::IREE::Input::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/IREEInput/#global-variable-ops","title":"Global variable ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobaladdress-inputglobaladdressop","title":"<code>iree_input.global.address</code> (Input::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.address` $global attr-dict `:` type($result)\n</code></pre> <p>Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_7","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputgloballoadindirect-inputgloballoadindirectop","title":"<code>iree_input.global.load.indirect</code> (Input::GlobalLoadIndirectOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.load.indirect` $global attr-dict `:` type($global) `-&gt;` type($result)\n</code></pre> <p>Returns a copy of the global value.</p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_6","title":"Operands:","text":"Operand Description <code>global</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#results_8","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalload-inputgloballoadop","title":"<code>iree_input.global.load</code> (Input::GlobalLoadOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.load` $global attr-dict `:` type($result)\n</code></pre> <p>Returns a copy of the global value.</p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_9","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobal-inputglobalop","title":"<code>iree_input.global</code> (Input::GlobalOp)","text":"<p>Stateful global variable declaration</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              (`initializer` `(` $initializer^ `)`)?\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different global storage.</p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initializer</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>initial_value</code>::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstoreindirect-inputglobalstoreindirectop","title":"<code>iree_input.global.store.indirect</code> (Input::GlobalStoreIndirectOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.store.indirect` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a copy of the value into a global.</p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_7","title":"Operands:","text":"Operand Description <code>value</code> any type <code>global</code> ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstore-inputglobalstoreop","title":"<code>iree_input.global.store</code> (Input::GlobalStoreOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.global.store` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a copy of the value into a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_8","title":"Operands:","text":"Operand Description <code>value</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#mutable-list-ops","title":"Mutable list ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistcreate-inputlistcreateop","title":"<code>iree_input.list.create</code> (Input::ListCreateOp)","text":"<p>Creates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.create` ($initial_capacity^)? attr-dict `:` type($result)\n</code></pre> <p>Creates a new empty list with an optional initial capacity.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_9","title":"Operands:","text":"Operand Description <code>initial_capacity</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results_10","title":"Results:","text":"Result Description <code>result</code> list"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistget-inputlistgetop","title":"<code>iree_input.list.get</code> (Input::ListGetOp)","text":"<p>Element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.get` $list `[` $index `]` attr-dict `:` type($list) `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_10","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> index"},{"location":"reference/mlir-dialects/IREEInput/#results_11","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistresize-inputlistresizeop","title":"<code>iree_input.list.resize</code> (Input::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.resize` operands attr-dict `:` type($list)\n</code></pre> <p>Resizes the list to contain <code>new_size</code> elements. This will either truncate the list if the existing size is greater than <code>new_size</code> or extend the list with the default list value of the element type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_11","title":"Operands:","text":"Operand Description <code>list</code> list <code>new_size</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistset-inputlistsetop","title":"<code>iree_input.list.set</code> (Input::ListSetOp)","text":"<p>Element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.set` $list `[` $index `]` `,` $value attr-dict `:` type($list) `,` type($value)\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_12","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> index <code>value</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistsize-inputlistsizeop","title":"<code>iree_input.list.size</code> (Input::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.list.size` operands attr-dict `:` type($list)\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_13","title":"Operands:","text":"Operand Description <code>list</code> list"},{"location":"reference/mlir-dialects/IREEInput/#results_12","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#pseudo-ops-for-conversion-support","title":"Pseudo ops for conversion support","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorexport-inputtensorexportop","title":"<code>iree_input.tensor.export</code> (Input::TensorExportOp)","text":"<p>Exports a tensor to a Buffer(View), capturing dynamic dims</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.export` $source `:` type($source) (`{` $source_dims^ `}`)? `-&gt;` type($target)\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_14","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_13","title":"Results:","text":"Result Description <code>target</code> Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorimport-inputtensorimportop","title":"<code>iree_input.tensor.import</code> (Input::TensorImportOp)","text":"<p>Imports a Buffer(View) to a tensor, providing dynamic dims</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.import` $source `:` type($source) `-&gt;` type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_15","title":"Operands:","text":"Operand Description <code>source</code> Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type <code>target_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_14","title":"Results:","text":"Result Description <code>target</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorbitcast-inputtensorbitcastop","title":"<code>iree_input.tensor.bitcast</code> (Input::TensorBitCastOp)","text":"<p>Bitcasts a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.bitcast` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Bitcasts a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_16","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_15","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorclone-inputtensorcloneop","title":"<code>iree_input.tensor.clone</code> (Input::TensorCloneOp)","text":"<p>Performs a full tensor clone operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.clone` $operand `:` type($result) (`{` $operand_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones the input tensor into an identical output tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_17","title":"Operands:","text":"Operand Description <code>operand</code> ranked tensor of any type values <code>operand_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_16","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorload-inputtensorloadop","title":"<code>iree_input.tensor.load</code> (Input::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.load` $source (`[` $indices^ `]`)? `:`\n              type($source) (`{` $source_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_18","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_17","title":"Results:","text":"Result Description <code>result</code> index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorreshape-inputtensorreshapeop","title":"<code>iree_input.tensor.reshape</code> (Input::TensorReshapeOp)","text":"<p>Reshapes a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.reshape` $source `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Reshapes a tensor to a new shape without modifying the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_19","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_18","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorslice-inputtensorsliceop","title":"<code>iree_input.tensor.slice</code> (Input::TensorSliceOp)","text":"<p>Slices out a subregion of a tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n              type($source) (`{` $source_dims^ `}`)? `-&gt;`\n              type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Clones a subregion of a tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_20","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values <code>source_dims</code> variadic of index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_19","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorsplat-inputtensorsplatop","title":"<code>iree_input.tensor.splat</code> (Input::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_21","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type <code>result_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_20","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorstore-inputtensorstoreop","title":"<code>iree_input.tensor.store</code> (Input::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n              type($target) (`{` $target_dims^ `}`)?\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_22","title":"Operands:","text":"Operand Description <code>value</code> index or signless integer or floating-point or complex-type or vector of any type values <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_21","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensortrace-inputtensortraceop","title":"<code>iree_input.tensor.trace</code> (Input::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.trace` $key `=` `[`\n              custom&lt;ShapedOperandList&gt;($values, type($values), $value_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_23","title":"Operands:","text":"Operand Description <code>values</code> variadic of ranked tensor of any type values <code>value_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorupdate-inputtensorupdateop","title":"<code>iree_input.tensor.update</code> (Input::TensorUpdateOp)","text":"<p>Updates a tensor with the contents of another tensor</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n              type($update) (`{` $update_dims^ `}`)? `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($result), $target_dims)\n              attr-dict-with-keyword\n</code></pre> <p>Updates the target tensor with the contents of the update tensor at the given offset indices.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_24","title":"Operands:","text":"Operand Description <code>target</code> ranked tensor of any type values <code>target_dims</code> variadic of index <code>start_indices</code> variadic of index <code>update</code> ranked tensor of any type values <code>update_dims</code> variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_22","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputalign-inputalignop","title":"<code>iree_input.align</code> (Input::AlignOp)","text":"<p>Aligns up to a power-of-two alignment if required</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.align` $value `,` $alignment attr-dict `:` type($result)\n</code></pre> <p>Aligns |value| up to the given power-of-two |alignment| if required.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#operands_25","title":"Operands:","text":"Operand Description <code>value</code> signless-integer-like <code>alignment</code> signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#results_23","title":"Results:","text":"Result Description <code>result</code> signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputnull-inputnullop","title":"<code>iree_input.null</code> (Input::NullOp)","text":"<p>A null value</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.null` attr-dict `:` type($result)\n</code></pre> <p>Initializes reference and variant types with a null value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#results_24","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/IREEInput/#workgroup-dispatch-ops","title":"Workgroup dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupcount-inputdispatchworkgroupcountop","title":"<code>iree_input.dispatch.workgroup.count</code> (Input::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.count[0] : index\n%y = iree_input.dispatch.workgroup.count[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_25","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupid-inputdispatchworkgroupidop","title":"<code>iree_input.dispatch.workgroup.id</code> (Input::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, iree_input.dispatch.workgroup.count)</code> along each dimension.</p> <p>Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.id[0] : index\n%y = iree_input.dispatch.workgroup.id[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_26","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupsize-inputdispatchworkgroupsizeop","title":"<code>iree_input.dispatch.workgroup.size</code> (Input::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `iree_input.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the iree dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).</p> <pre><code>%x = iree_input.dispatch.workgroup.size[0] : index\n%y = iree_input.dispatch.workgroup.size[1] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEInput/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_27","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/IREEInput/#attributes_13","title":"Attributes","text":""},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"<p>descriptor set binding specification</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_set.binding&lt;\n  int64_t,   # ordinal\n  DescriptorType,   # type\n  std::optional&lt;DescriptorFlags&gt;   # flags\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> type <code>DescriptorType</code> flags <code>std::optional&lt;DescriptorFlags&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"<p>descriptor set layout specification</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_set.layout&lt;\n  int64_t,   # ordinal\n  ::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;,   # bindings\n  std::optional&lt;DescriptorSetLayoutFlags&gt;   # flags\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description ordinal <code>int64_t</code> bindings <code>::llvm::ArrayRef&lt;DescriptorSetBindingAttr&gt;</code> flags <code>std::optional&lt;DescriptorSetLayoutFlags&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#descriptortypeattr","title":"DescriptorTypeAttr","text":"<p>valid DescriptorType</p> <p>Syntax:</p> <pre><code>#iree_input.descriptor_type&lt;\n  ::mlir::iree_compiler::IREE::Input::DescriptorType   # value\n&gt;\n</code></pre> <p>Enum cases: * uniform_buffer (<code>UniformBuffer</code>) * storage_buffer (<code>StorageBuffer</code>)</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::Input::DescriptorType</code> an enum of type DescriptorType"},{"location":"reference/mlir-dialects/IREEInput/#devicetargetattr","title":"DeviceTargetAttr","text":"<p>generic device target specification</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description deviceID <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectattr","title":"ExecutableObjectAttr","text":"<p>executable object reference</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description path <code>StringAttr</code> data <code>DenseIntElementsAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"<p>target-specific object file references</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description targets <code>ArrayAttr</code> targetObjects <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#executabletargetattr","title":"ExecutableTargetAttr","text":"<p>generic executable target specification</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description backend <code>StringAttr</code> format <code>StringAttr</code> configuration <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/IREEInput/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"<p>executable entry point layout specification</p> <p>Syntax:</p> <pre><code>#iree_input.pipeline.layout&lt;\n  int64_t,   # pushConstants\n  ::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;   # setLayouts\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pushConstants <code>int64_t</code> setLayouts <code>::llvm::ArrayRef&lt;DescriptorSetLayoutAttr&gt;</code>"},{"location":"reference/mlir-dialects/IREEInput/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/IREEInput/#list","title":"list","text":"<p>A mutable, resizable list of some type.</p>"},{"location":"reference/mlir-dialects/IREEInput/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffertype","title":"BufferType","text":"<p>Buffer is an untyped bag of bits with no shape or dtype</p> <p>Syntax: <code>!iree_input.buffer</code></p> <p>Buffers represent an untyped bag of bits that can be reinterpreted depending on a use case using <code>buffer_view</code> operation. Buffers can be used for packing multiple tensors into the same underlying storage. It is left to higher level code to decide how exactly tensors layed out in the buffer.</p>"},{"location":"reference/mlir-dialects/IREEInput/#bufferviewtype","title":"BufferViewType","text":"<p>View into a buffer, with runtime shape and element type</p> <p>Syntax: <code>!iree_input.buffer_view</code></p> <p>BufferViews represent views onto backing IREE runtime Buffer objects, adding runtime shape and element type parameters to the backing buffer. BufferViews are typically accepted and returned at boundaries with external code.</p> <p>In the runtime and lower level compiler, BufferView's are fully modeled; however, as boundary types, not all features are exposed publicly. Since within compiled tensor programs, it is typical to operate in terms of fully typed tensors, the primary mechanism for getting or using a BufferView at the high level is by casting to/from a tensor. It is left to higher level code to ensure that aliasing rules are enforced at such boundaries.</p>"},{"location":"reference/mlir-dialects/IREEInput/#bytebuffertype","title":"ByteBufferType","text":"<p>a reference counted byte buffer</p> <p>Syntax: <code>!iree_input.byte_buffer</code></p> <p>A reference counted byte buffer that models a pointer, offset, and length.</p>"},{"location":"reference/mlir-dialects/IREEInput/#listtype","title":"ListType","text":"<p>A one dimensional list of runtime values</p> <p>Represents a list of arbitrary type. Primitive types can be expected to be efficiently stored in an unboxed form. Reference types and variants are permitted.</p> <p>Lists can either be homogenous, with a fixed element type, or heterogenous by parameterizing them with a VariantType.</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description elementType <code>::mlir::Type</code> A type suitable as an element type of a container"},{"location":"reference/mlir-dialects/IREEInput/#ptrtype","title":"PtrType","text":"<p>Pointer to a concrete type</p>"},{"location":"reference/mlir-dialects/IREEInput/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description targetType <code>::mlir::Type</code> A type suitable as a target type of a pointer"},{"location":"reference/mlir-dialects/IREEInput/#varianttype","title":"VariantType","text":"<p>Represents any legal or reference type in the IREE runtime</p> <p>Syntax: <code>!iree_input.variant</code></p> <p>The variant type is typically used to parameterize container types that can contain any legal primitive, reference or null in the IREE type system.</p>"},{"location":"reference/mlir-dialects/IREEVectorExt/","title":"IREEVectorExt","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_ext-dialect","title":"'iree_vector_ext' Dialect","text":"<p>IREE Vector Extensions.</p> <p>A dialect designed for experimenting with vector operations beyond what is currently available in the Vector Dialect.</p> <ul> <li>'iree_vector_ext' Dialect<ul> <li>Operations<ul> <li>iree_vector_ext.layout_conflict_resolution (VectorExt::LayoutConflictResolutionOp)</li> <li>iree_vector_ext.to_simd (VectorExt::ToSIMDOp)</li> <li>iree_vector_ext.to_simt (VectorExt::ToSIMTOp)</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extlayout_conflict_resolution-vectorextlayoutconflictresolutionop","title":"<code>iree_vector_ext.layout_conflict_resolution</code> (VectorExt::LayoutConflictResolutionOp)","text":"<p>Layout Conflict Resolution operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.layout_conflict_resolution` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>The layout conflict resolution operator takes a vector and a desired layout and transforms the vector to one with the desired layout.</p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sourceLayout</code>::mlir::iree_compiler::IREE::VectorExt::VectorLayoutInterfaceVectorLayoutInterface instance <code>desiredLayout</code>::mlir::iree_compiler::IREE::VectorExt::VectorLayoutInterfaceVectorLayoutInterface instance"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extto_simd-vectorexttosimdop","title":"<code>iree_vector_ext.to_simd</code> (VectorExt::ToSIMDOp)","text":"<p>SIMT to SIMD conversion operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.to_simd` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>This operation is a temporary operation useful for source/target materializations when doing type conversions between distributed and not distributed vectors.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultElementType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands_1","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results_1","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extto_simt-vectorexttosimtop","title":"<code>iree_vector_ext.to_simt</code> (VectorExt::ToSIMTOp)","text":"<p>SIMD to SIMT conversion operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_vector_ext.to_simt` $input attr-dict `:` type($input) `-&gt;` type($output)\n</code></pre> <p>This operation is a temporary operation useful for source/target materializations when doing type conversions between distributed and not distributed vectors.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultElementType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands_2","title":"Operands:","text":"Operand Description <code>input</code> vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results_2","title":"Results:","text":"Result Description <code>output</code> vector of any type values"},{"location":"reference/mlir-dialects/LinalgExt/","title":"LinalgExt","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_ext-dialect","title":"'iree_linalg_ext' Dialect","text":"<p>IREE Linalg Extensions.</p> <p>A dialect designed for experimenting with non-structured operations that cannot be represented efficiently/directly by the Linalg dialect.</p> <ul> <li>'iree_linalg_ext' Dialect<ul> <li>Operations<ul> <li>Data tiling ops<ul> <li>iree_linalg_ext.pack (LinalgExt::PackOp)</li> <li>iree_linalg_ext.set_encoding (LinalgExt::SetEncodingOp)</li> <li>iree_linalg_ext.unpack (LinalgExt::UnPackOp)</li> <li>iree_linalg_ext.unset_encoding (LinalgExt::UnsetEncodingOp)</li> <li>iree_linalg_ext.upper_bound_tile_size (LinalgExt::UpperBoundTileSizeOp)</li> </ul> </li> <li>Non-structured ops<ul> <li>iree_linalg_ext.attention (LinalgExt::AttentionOp)</li> <li>iree_linalg_ext.fft (LinalgExt::FftOp)</li> <li>iree_linalg_ext.reverse (LinalgExt::ReverseOp)</li> <li>iree_linalg_ext.scan (LinalgExt::ScanOp)</li> <li>iree_linalg_ext.scatter (LinalgExt::ScatterOp)</li> <li>iree_linalg_ext.sort (LinalgExt::SortOp)</li> <li>iree_linalg_ext.topk (LinalgExt::TopkOp)</li> </ul> </li> <li>Utility ops<ul> <li>iree_linalg_ext.yield (LinalgExt::YieldOp)</li> </ul> </li> <li>Winograd ops<ul> <li>iree_linalg_ext.winograd.input_transform (LinalgExt::WinogradInputTransformOp)</li> <li>iree_linalg_ext.winograd.output_transform (LinalgExt::WinogradOutputTransformOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>EncodingAttr</li> <li>EncodingRoleAttr</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/LinalgExt/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/LinalgExt/#data-tiling-ops","title":"Data tiling ops","text":"<p>Operations for working with data layouts, padding, encodings, and other properties useful for tiling computations across iteration space dimensions.</p>"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extpack-linalgextpackop","title":"<code>iree_linalg_ext.pack</code> (LinalgExt::PackOp)","text":"<p>Pack operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.pack` attr-dict\n              $inputs\n              (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?\n              (`outer_dims_perm` `=` $outer_dims_perm^)?\n              `inner_dims_pos` `=` $inner_dims_pos\n              `inner_tiles` `=`\n              custom&lt;DynamicIndexList&gt;($inner_tiles, $static_inner_tiles)\n              `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>The pack operation converts an <code>input</code> into a tiled and packed layout. The dimensions to be tiled are obtained from <code>inner_dims_pos</code> and the size of the tile is obtained from <code>inner_tiles</code>. The dimensions listed in <code>inner_dims_pos</code> do not need to be contiguous in which case the tile will get transposed.  We handle only full tiles if <code>padding_value</code> is not set; it is UB if the tile does not perfectly divide the dimension. If <code>padding_value</code> is set, it will pad along high dimensions, i.e., it pads at the bottom and on the right if the input has rank 2, and the result type shape, will be dynamic in any dimension if and only if the input shape is. As optional input, the operation takes <code>outer_dims_perm</code> that allows to permute the tiled loops.</p> <p>Example KC_to_KCck:</p> <pre><code>iree_linalg_ext.pack %arg0 inner_dims_pos = [1, 0]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;16x8x32x8xf32&gt;)\n</code></pre> <p>Example NC_to_NCnc:</p> <p><pre><code>iree_linalg_ext.pack %arg0 inner_dims_pos = [0, 1]\n  inner_tiles = [8, 32] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;16x8x8x32xf32&gt;)\n</code></pre> Example KC_to_CKkc</p> <pre><code>iree_linalg_ext.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;128x256xf32&gt; memref&lt;32x4x32x8xf32&gt;)\n</code></pre> <p>In all cases, dimension at position 0 in the input memref (128) is tiled with a factor of 8, while dimension at position 1 (256) is tiled with a factor of 32. In the KC_to_KCck example, the point loops are interchanged, while in the KC_to_CKkc example the tiled loops.</p> <p>Example NC_to_NCnc with padding:</p> <pre><code>iree_linalg_ext.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]\n  inner_tiles = [8, 2] into %arg1 : (memref&lt;13x15xf32&gt; memref&lt;2x8x8x2xf32&gt;)\n</code></pre> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>outer_dims_perm</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>inner_dims_pos</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_inner_tiles</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values <code>inner_tiles</code> variadic of index <code>padding_value</code> any type"},{"location":"reference/mlir-dialects/LinalgExt/#results","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extset_encoding-linalgextsetencodingop","title":"<code>iree_linalg_ext.set_encoding</code> (LinalgExt::SetEncodingOp)","text":"<p>Perform pack and pad operation on source</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.set_encoding` attr-dict $source `:` type($source) `-&gt;` type($result)\n</code></pre> <p>Operation to assign an encoding to a tensor. The operation does not change the rank or extent of a tensor. Instead it adds an encoding attribute to the tensor type to represent a change in layout.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_1","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_1","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extunpack-linalgextunpackop","title":"<code>iree_linalg_ext.unpack</code> (LinalgExt::UnPackOp)","text":"<p>Unpack operation</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.unpack` attr-dict\n              $inputs\n              (`outer_dims_perm` `=` $outer_dims_perm^)?\n              `inner_dims_pos` `=` $inner_dims_pos\n              `inner_tiles` `=`\n              custom&lt;DynamicIndexList&gt;($inner_tiles, $static_inner_tiles)\n              `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>The unpack operation converts a tiled and packed input to an unpacked output. See <code>pack</code> for more details on <code>inner_tiles</code> and <code>dims_pos</code>; it is UB if the tile does not perfectly divide the dimension. Optionally, the operation also supports permuting the tiled loops.</p> <p>Example KCck_to_KC:</p> <pre><code>iree_linalg_ext.unpack %arg0 dims_pos = [1, 0]\n  inner_tiles = [32, 8] into %arg1 : (memref&lt;16x8x32x8xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Example NCnc_to_NC:</p> <pre><code>iree_linalg_ext.unpack %arg0 dims_pos = [0, 1]\n  inner_tiles = [8, 32] into %arg1 : (memref&lt;16x8x8x32xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Example CKkc_to_KC:</p> <pre><code>iree_linalg_ext.unpack %arg1 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n  inner_tiles = [32, 8] into %arg0 : (memref&lt;32x4x32x8xf32&gt; memref&lt;128x256xf32&gt;)\n</code></pre> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>outer_dims_perm</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>inner_dims_pos</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>static_inner_tiles</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_2","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values <code>inner_tiles</code> variadic of index"},{"location":"reference/mlir-dialects/LinalgExt/#results_2","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extunset_encoding-linalgextunsetencodingop","title":"<code>iree_linalg_ext.unset_encoding</code> (LinalgExt::UnsetEncodingOp)","text":"<p>Perfom unpack and extract operation on source</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.unset_encoding` attr-dict $source `:` type($source) `-&gt;` type($result)\n</code></pre> <p>Operation to convert an tensor with encoding that represents its data layout into a tensor with default layout (i.e. no encoding). For now in IREE the default layout is row-major.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>ReifyRankedShapedTypeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_3","title":"Operands:","text":"Operand Description <code>source</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_3","title":"Results:","text":"Result Description <code>result</code> ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extupper_bound_tile_size-linalgextupperboundtilesizeop","title":"<code>iree_linalg_ext.upper_bound_tile_size</code> (LinalgExt::UpperBoundTileSizeOp)","text":"<p>Returns an upper bound on tile sizes</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.upper_bound_tile_size` attr-dict $tensorType `-&gt;` type($results)\n</code></pre> <p>This returns the largest tile sizes that might result from materialization of the given encoding. This can be used outside of target-specific code, so there may be multiple targets, and this will return the maximum tile size from iterating over all of them. The evaluation happens in the MaterializeUpperBoundTileSize pass.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tensorType</code>::mlir::TypeAttrtype attribute of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_4","title":"Results:","text":"Result Description <code>results</code> variadic of index"},{"location":"reference/mlir-dialects/LinalgExt/#non-structured-ops","title":"Non-structured ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extattention-linalgextattentionop","title":"<code>iree_linalg_ext.attention</code> (LinalgExt::AttentionOp)","text":"<p>Attention operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.attention` attr-dict\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($results)^)?\n</code></pre> <p>This operator takes in 3 tensors: query(Q), key(K) and value(V) and computes the attention. For self-attention, all inputs have the same shape BxNxd where B is the of the batch dimension, N is the sequence length and d is head dimension. Typically N &gt;&gt;&gt; d. Mathematically, the attention is defined as matmul(softmax(matmul(Q, transpose(K))), V) and has shape BxNxd. Usually, this operator also performs scaling, masking and dropout, but we leave that out of the current implementation. For cross-attention, the query and output have the same shape (BxNxd), while the key and value differ in sequence length (they have shape BxLxd, where L != N). This operator after tiling results in a tiled result as per flash attention and results in the current <code>max</code> and <code>sum</code> statistics while processing the current tile.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>transpose_v</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_4","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_5","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extfft-linalgextfftop","title":"<code>iree_linalg_ext.fft</code> (LinalgExt::FftOp)","text":"<p>Fft operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.fft` attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`:` type($results)^)?\n</code></pre> <p>Apply 1D FFT to innermost dim. This is an iterative FFT, not recurrsive. Thus, the bit reversal is assumed applied on the input. The op carries an input -- stage, which indicates the level of reduction loop in the algorithm. It represents the computation body. For more details, see \"Data reordering, bit reversal, and in-place algorithms\" section in https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm</p> <p>The size of innermost dim is expected to be a power of 2.</p> <p>It is optional to carry coefficient tensors/buffers as inputs. In this context, they will be the second and third inputs.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_5","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of any type <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_6","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extreverse-linalgextreverseop","title":"<code>iree_linalg_ext.reverse</code> (LinalgExt::ReverseOp)","text":"<p>Reverse operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.reverse` attr-dict `dimensions` `(` $dimensions `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              (`outs` `(` $outputs^ `:` type($outputs) `)`)?\n              (`:` type($results)^)?\n</code></pre> <p>A temporary solution for lowering reverse ops into IREE, allowing IREE to tile and distribute them. }</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimensions</code>::mlir::DenseIntElementsAttr64-bit signless integer elements attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_6","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_7","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extscan-linalgextscanop","title":"<code>iree_linalg_ext.scan</code> (LinalgExt::ScanOp)","text":"<p>Scan operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.scan` attr-dict\n              `dimension` `(` $dimension `)`\n              `inclusive` `(` $inclusive `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Computes the inclusive/exclusive scan along a given dimension.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute <code>inclusive</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_7","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_8","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extscatter-linalgextscatterop","title":"<code>iree_linalg_ext.scatter</code> (LinalgExt::ScatterOp)","text":"<p>Scatter operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.scatter` attr-dict `dimension_map` `=` $dimension_map\n              `unique_indices` `(` $unique_indices `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Based on XLA operation semantics, takes two <code>inputs</code> (<code>update</code> and <code>indices</code>) and <code>outputs</code> value (<code>original</code>). The operation updates the value at the slices specified by <code>indices</code> by combining the current value with the value in <code>updates</code> using the computation specified in <code>region</code>. The <code>region</code> specifies a binary operation of signature (T, T) -&gt; T, where <code>T</code> is the element-type of <code>updates</code> (and <code>original</code>). The first argument correspond the value to be updated (i.e. from <code>updates</code>), and the second the current value (i.e. value from <code>original</code>).</p> <p>The <code>indices</code> is a 2D tensor/memref type. The first dim is the number of updates, and the second dim is index depth. The index depth should always be static.</p> <p>The first dim of <code>updates</code> and <code>indices</code> is identical, since they represent the number of updates.</p> <p>The rank of the <code>original</code>/<code>result</code> is at least <code>index_depth + rank(%updates) - 1</code>. The first <code>index_depth</code> indices are derived from <code>indices</code> and the shape of update value has the last rank(%original) - index_depth values match %(originals) last dimensions, with the previous dims extending from the index offsets.</p> <p>The dimension_map attributes describes which index value maps to which dimension in the destionation. It cannot contain duplicate values, must have as many entries as index depth, and values must be within the rank of the destination.</p> <p>The unique_indices attribute carries the information whether all the indices are unique. If there are repeated indices, the first iteration loop will be marked as reduction.</p> <p>The shapes definition follows tensorflow operations execept that it force batch dims to be 1D. See more information in   https://www.tensorflow.org/api_docs/python/tf/tensor_scatter_nd_update</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension_map</code>::mlir::DenseI64ArrayAttri64 dense array attribute <code>unique_indices</code>::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_8","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of ranked tensor or memref of any type values <code>outputs</code> variadic of ranked tensor or memref of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_9","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extsort-linalgextsortop","title":"<code>iree_linalg_ext.sort</code> (LinalgExt::SortOp)","text":"<p>Sort operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.sort` attr-dict\n              `dimension` `(` $dimension `)`\n              (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>Based on XLA operation semantics, sorts the given <code>operands</code> at the given <code>dimension</code> with the given <code>comparator</code>.</p> <p>See https://www.tensorflow.org/xla/operation_semantics#sort.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_9","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of any type <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_10","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_exttopk-linalgexttopkop","title":"<code>iree_linalg_ext.topk</code> (LinalgExt::TopkOp)","text":"<p>Top-K operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.topk` attr-dict\n              `dimension` `(` $dimension `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              $region (`-&gt;` type($results)^)?\n</code></pre> <p>A Top-K operation for N-D tensors. Reduces the target dimension from the input size N down to K elements based on the supplied binary region.</p> <p>Accepts an N-D tensor input consisting of values and an optioanl N-D tensor for indices of those values (i32 type). If input indices aren't provided, the index mapping is inferred based on the k dim.  Both input values/indices tensors and output values/indicies tensors must have the same shape. Top-K is computed along the target dimension (from dimension()). Returns two output tensors of values and the indicies of Top-K results. The output dimensions must match the input save for the dimension that is reduced to K results.</p> <p>Region accepts lhs=[next N input] and rhs=[exiting K output] and yeilds an i1. If true, the two values are swapped:   - For Top-K compoarision: &gt;   - For Min-K comparision: &lt; Note: when the two values are equal, the first occurence is always selected.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>LinalgExtOp</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_10","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extyield-linalgextyieldop","title":"<code>iree_linalg_ext.yield</code> (LinalgExt::YieldOp)","text":"<p>LinalgExt yield op</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.yield` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p><code>iree_linalg_ext.yield</code> is a special terminator operation for blocks inside regions in <code>iree_linalg_ext</code> ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#operands_11","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/LinalgExt/#winograd-ops","title":"Winograd ops","text":""},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extwinogradinput_transform-linalgextwinogradinputtransformop","title":"<code>iree_linalg_ext.winograd.input_transform</code> (LinalgExt::WinogradInputTransformOp)","text":"<p>Winograd Input Transform operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.winograd.input_transform` attr-dict\n              `output_tile_size` `(` $output_tile_size `)`\n              `kernel_size` `(` $kernel_size `)`\n              `image_dimensions` `(` $image_dimensions `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($result)^)?\n</code></pre> <p>This operator is the first step in converting a convolution to its Winograd equivalent. Given a tile of an input image (I), this operator computes matmul(tranpose(B), matmul(I, B)). The input tile is assumed to be square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. B is a constant 2-d square matrix of the same shape as the input tile I. The input to the operator is an image of shape (N, H, W, C) or (N, C, H, W) and the output is an operator of shape (m + r - 1, m + r - 1, N, H', W', C) where H' = ceil((H - m + 1)/r) and W' = ceil((W - m + 1)/r). The result of this operator is first collapsed and then fed to a batch matmul op.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>output_tile_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>kernel_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>image_dimensions</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_12","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_12","title":"Results:","text":"Result Description <code>result</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#iree_linalg_extwinogradoutput_transform-linalgextwinogradoutputtransformop","title":"<code>iree_linalg_ext.winograd.output_transform</code> (LinalgExt::WinogradOutputTransformOp)","text":"<p>Winograd Output Transform operator</p> <p>Syntax:</p> <pre><code>operation ::= `iree_linalg_ext.winograd.output_transform` attr-dict\n              `output_tile_size` `(` $output_tile_size `)`\n              `kernel_size` `(` $kernel_size `)`\n              `image_dimensions` `(` $image_dimensions `)`\n              `ins` `(` $inputs `:` type($inputs) `)`\n              `outs` `(` $outputs `:` type($outputs) `)`\n              (`-&gt;` type($result)^)?\n</code></pre> <p>This operator is the last transform in converting a convolution to its Winograd equivalent. After convolution in the Winograd domain (which turns into an elementwise product for a single channel and batch matrix multiplication for many channels), this operator converts the output back into the original domain. Given a tile of the output (O) in the Winograd domain, this operator computes matmul(transpose(A), matmul(O, A)). The output tile is square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. A is a constant 2-d matrix of shape (m + r - 1) x r. The input to the operator is a tensor of shape (m + r - 1, m + r - 1, N, H', W', C) and the output is a tensor of shape (N, H, W, C) or (N, C, H, W) where H = r H' and W = r W'. This operator is followed by a tensor.extract_slice which extracts only the non-padded part of the output.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>SingleBlockImplicitTerminator&lt;::mlir::iree_compiler::IREE::LinalgExt::YieldOp&gt;</code>, <code>SingleBlock</code></p> <p>Interfaces: <code>DestinationStyleOpInterface</code>, <code>LinalgExtInterface</code>, <code>MemoryEffectOpInterface</code>, <code>ReifyRankedShapedTypeOpInterface</code>, <code>TilingInterface</code></p>"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>output_tile_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>kernel_size</code>::mlir::IntegerAttr64-bit signless integer attribute <code>image_dimensions</code>::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/LinalgExt/#operands_13","title":"Operands:","text":"Operand Description <code>inputs</code> variadic of shaped of any type values <code>outputs</code> variadic of shaped of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#results_13","title":"Results:","text":"Result Description <code>result</code> variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/LinalgExt/#attributes_11","title":"Attributes","text":""},{"location":"reference/mlir-dialects/LinalgExt/#encodingattr","title":"EncodingAttr","text":"<p>information to decide how to data-tile a tensor</p> <p>Syntax:</p> <pre><code>#iree_linalg_ext.encoding&lt;\n  EncodingRoleAttr,   # role\n  ArrayAttr,   # element_types\n  TypeAttr,   # original_type\n  IntegerAttr,   # matmul_narrow_M\n  IntegerAttr,   # matmul_narrow_N\n  ArrayAttr   # user_indexing_maps\n&gt;\n</code></pre> <p>This attribute describes the change in the layout for a given tensor to execute subsequent operations on the tiled layout. The encoding serves as a way to represent the change in the way the data is laid out in memory without changing the logical rank/extent of the tensor itself. When required, the encoding can be used to explicitly manifest the layout change through operations like pack/unpack.</p>"},{"location":"reference/mlir-dialects/LinalgExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description role <code>EncodingRoleAttr</code> role of this tensor as an operand element_types <code>ArrayAttr</code> element types of the user's operands original_type <code>TypeAttr</code> type of the original tensor type before padding matmul_narrow_M <code>IntegerAttr</code> optional M narrow dimension size (only for contraction op user_indexing_maps) matmul_narrow_N <code>IntegerAttr</code> optional N narrow dimension size (only for contraction op user_indexing_maps) user_indexing_maps <code>ArrayAttr</code> Indexing maps of the operation using this tensor"},{"location":"reference/mlir-dialects/LinalgExt/#encodingroleattr","title":"EncodingRoleAttr","text":"<p>Describes the role of the tensor as an operand or a result of an operation.</p> <p>Syntax:</p> <pre><code>#iree_linalg_ext.role&lt;\n  ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole   # value\n&gt;\n</code></pre> <p>Enum cases: * LHS (<code>LHS</code>) * RHS (<code>RHS</code>) * RESULT (<code>RESULT</code>)</p>"},{"location":"reference/mlir-dialects/LinalgExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description value <code>::mlir::iree_compiler::IREE::LinalgExt::EncodingRole</code> an enum of type EncodingRole"},{"location":"reference/mlir-dialects/Stream/","title":"Stream","text":""},{"location":"reference/mlir-dialects/Stream/#stream-dialect","title":"'stream' Dialect","text":"<p>A dialect designed to model execution partitioning and scheduling.</p> <p>The stream dialect is designed to take tensor programs and convert them to explicitly scheduled asynchronous programs. This includes placing ops on specific targets, partitioning the work between the targets, scheduling the work for concurrency, and encoding tensors into target-specific resources.</p> <pre><code>+--------+    +----------+    +-------+\n| flow.* | -&gt; | stream.* | -&gt; | hal.* |\n+--------+    +----------+    +-------+\n</code></pre> <p>This sits in-between the <code>flow</code> and <code>hal</code> dialects.</p> <ul> <li> <p><code>flow</code> models tensor programs by separating work into dispatchable   functions in order to isolate the main host program data flow and the   dense tensor compute operations.</p> </li> <li> <p><code>stream</code> models explicitly scheduled asynchronous programs by partitioning   the dispatchable work, specifying target affinities, encoding tensors into   target-specific forms, and scheduling the work to run concurrently.</p> </li> <li> <p><code>hal</code> models a low-level hardware abstraction layer used to manage   buffers and issue asynchronous work across a variety of device types. The   dialect is largely 1:1 with the IREE HAL C API.</p> </li> </ul> <p>Transforms in the dialect lower tensor values into opaque resources with the goal of ensuring no tensors survive in the IR. At entry <code>stream.tensor.*</code> ops are used to capture the source tensor encoding information (data type, shapes, etc) and then lowered into <code>stream.async.*</code> ops that model the asynchronous workloads on the opaque resources. The asynchronous operations are then partitioned, allocated, and scheduled for execution using the <code>stream.cmd.*</code> ops.</p> <p>It's intended that after transformation through the stream dialect the program is ready for execution on an abstract machine. At this level of representation buffers have still not been allocated and devices are not yet resolved, however the information captured in the <code>stream</code> IR allows such operations to be done trivially. To this end all ops carry the symbolic size of the resources on which they operate as well as the lifetime of the resources they are acting upon. This manifests in the usage of the <code>!stream.resource</code> type:</p> <pre><code>// Unresolved lifetime (resolved during the iree-stream-refine-usage pass):\n!stream.resource&lt;*&gt;\n// An externally managed value (passed in via the program API).\n!stream.resource&lt;external&gt;\n// A staging buffer for uploads/downloads.\n!stream.resource&lt;staging&gt;\n// A short-lived value that is used across streams.\n!stream.resource&lt;transient&gt;\n// A long-lived value that persists across streams in globals.\n!stream.resource&lt;variable&gt;\n// An immutable value that persists for the duration of the program.\n!stream.resource&lt;constant&gt;\n</code></pre> <p>Operations using resources carry the size of all operand result resources:</p> <pre><code>// %update (40 bytes) is being inserted into %target (296 bytes).\n// Can be dynamic values such as those originating from dynamic dimensions.\n%13 = stream.async.update %update, %target[%c256 to %c296] :\n    !stream.resource&lt;transient&gt;{%c40} -&gt;\n    %target as !stream.resource&lt;transient&gt;{%c296}\n</code></pre> <p>Once all <code>stream.async.*</code> work is moved into executable regions (such as <code>stream.async.execute</code>) <code>!stream.timepoint</code> values are used to sequence the execution. These timepoints represent some point in time where all execution up to that timepoint has completed and any results that were produced by the execution are available for use. Attempting to use the resources before their corresponding timepoint has been reached will lead to undefined behavior. The benefit of this is that after timepoints are established in the IR it's possible to induce aliasing of resources without breaking execution correctness.</p> <ul> <li>'stream' Dialect<ul> <li>Operations<ul> <li>Async control flow ops<ul> <li>stream.async.call (Stream::AsyncCallOp)</li> <li>stream.async.concurrent (Stream::AsyncConcurrentOp)</li> <li>stream.async.execute (Stream::AsyncExecuteOp)</li> <li>stream.async.func (Stream::AsyncFuncOp)</li> </ul> </li> <li>Channel ops<ul> <li>stream.channel.count (Stream::ChannelCountOp)</li> <li>stream.channel.create (Stream::ChannelCreateOp)</li> <li>stream.channel.rank (Stream::ChannelRankOp)</li> <li>stream.channel.split (Stream::ChannelSplitOp)</li> </ul> </li> <li>Executable ops<ul> <li>stream.binding.subspan (Stream::BindingSubspanOp)</li> <li>stream.dispatch.workgroup.count (Stream::DispatchWorkgroupCountOp)</li> <li>stream.dispatch.workgroup.id (Stream::DispatchWorkgroupIDOp)</li> <li>stream.dispatch.workgroup.size (Stream::DispatchWorkgroupSizeOp)</li> <li>stream.executable.end (Stream::ExecutableEndOp)</li> <li>stream.executable.export (Stream::ExecutableExportOp)</li> <li>stream.executable (Stream::ExecutableOp)</li> </ul> </li> <li>Execution context ops<ul> <li>stream.context.resolve (Stream::ContextResolveOp)</li> </ul> </li> <li>Explicit command ops<ul> <li>stream.cmd.call (Stream::CmdCallOp)</li> <li>stream.cmd.collective (Stream::CmdCollectiveOp)</li> <li>stream.cmd.concurrent (Stream::CmdConcurrentOp)</li> <li>stream.cmd.copy (Stream::CmdCopyOp)</li> <li>stream.cmd.discard (Stream::CmdDiscardOp)</li> <li>stream.cmd.dispatch (Stream::CmdDispatchOp)</li> <li>stream.cmd.execute (Stream::CmdExecuteOp)</li> <li>stream.cmd.fill (Stream::CmdFillOp)</li> <li>stream.cmd.flush (Stream::CmdFlushOp)</li> <li>stream.cmd.func (Stream::CmdFuncOp)</li> <li>stream.cmd.invalidate (Stream::CmdInvalidateOp)</li> <li>stream.cmd.serial (Stream::CmdSerialOp)</li> </ul> </li> <li>File ops<ul> <li>stream.file.constant (Stream::FileConstantOp)</li> <li>stream.file.read (Stream::FileReadOp)</li> <li>stream.file.write (Stream::FileWriteOp)</li> </ul> </li> <li>Miscellaneous ops<ul> <li>stream.return (Stream::ReturnOp)</li> <li>stream.yield (Stream::YieldOp)</li> </ul> </li> <li>Pseudo Ops<ul> <li>stream.tensor.export (Stream::TensorExportOp)</li> <li>stream.tensor.import (Stream::TensorImportOp)</li> </ul> </li> <li>Resource ops<ul> <li>stream.resource.alloc (Stream::ResourceAllocOp)</li> <li>stream.resource.alloca (Stream::ResourceAllocaOp)</li> <li>stream.resource.constants (Stream::ResourceConstantsOp)</li> <li>stream.resource.dealloca (Stream::ResourceDeallocaOp)</li> <li>stream.resource.load (Stream::ResourceLoadOp)</li> <li>stream.resource.pack (Stream::ResourcePackOp)</li> <li>stream.resource.size (Stream::ResourceSizeOp)</li> <li>stream.resource.store (Stream::ResourceStoreOp)</li> <li>stream.resource.subview (Stream::ResourceSubviewOp)</li> <li>stream.resource.try_map (Stream::ResourceTryMapOp)</li> </ul> </li> <li>Resource parameter I/O ops<ul> <li>stream.parameter.gather (Stream::ParameterGatherOp)</li> <li>stream.parameter.load (Stream::ParameterLoadOp)</li> <li>stream.parameter.read (Stream::ParameterReadOp)</li> <li>stream.parameter.scatter (Stream::ParameterScatterOp)</li> <li>stream.parameter.write (Stream::ParameterWriteOp)</li> </ul> </li> <li>Resource transfer ops<ul> <li>stream.async.alloca (Stream::AsyncAllocaOp)</li> <li>stream.async.clone (Stream::AsyncCloneOp)</li> <li>stream.async.collective (Stream::AsyncCollectiveOp)</li> <li>stream.async.constant (Stream::AsyncConstantOp)</li> <li>stream.async.copy (Stream::AsyncCopyOp)</li> <li>stream.async.dispatch (Stream::AsyncDispatchOp)</li> <li>stream.async.fill (Stream::AsyncFillOp)</li> <li>stream.async.load (Stream::AsyncLoadOp)</li> <li>stream.async.slice (Stream::AsyncSliceOp)</li> <li>stream.async.splat (Stream::AsyncSplatOp)</li> <li>stream.async.store (Stream::AsyncStoreOp)</li> <li>stream.async.transfer (Stream::AsyncTransferOp)</li> <li>stream.async.update (Stream::AsyncUpdateOp)</li> </ul> </li> <li>Synchronization ops<ul> <li>stream.timepoint.await (Stream::TimepointAwaitOp)</li> <li>stream.timepoint.barrier (Stream::TimepointBarrierOp)</li> <li>stream.timepoint.chain_external (Stream::TimepointChainExternalOp)</li> <li>stream.timepoint.export (Stream::TimepointExportOp)</li> <li>stream.timepoint.immediate (Stream::TimepointImmediateOp)</li> <li>stream.timepoint.import (Stream::TimepointImportOp)</li> <li>stream.timepoint.join (Stream::TimepointJoinOp)</li> </ul> </li> <li>Tensor ops<ul> <li>stream.tensor.clone (Stream::TensorCloneOp)</li> <li>stream.tensor.constant (Stream::TensorConstantOp)</li> <li>stream.tensor.empty (Stream::TensorEmptyOp)</li> <li>stream.tensor.fill (Stream::TensorFillOp)</li> <li>stream.tensor.load (Stream::TensorLoadOp)</li> <li>stream.tensor.sizeof (Stream::TensorSizeOfOp)</li> <li>stream.tensor.slice (Stream::TensorSliceOp)</li> <li>stream.tensor.splat (Stream::TensorSplatOp)</li> <li>stream.tensor.store (Stream::TensorStoreOp)</li> <li>stream.tensor.trace (Stream::TensorTraceOp)</li> <li>stream.tensor.update (Stream::TensorUpdateOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>CollectiveAttr</li> <li>NamedParameterAttr</li> <li>PartitioningConfigAttr</li> <li>ResourceConfigAttr</li> <li>TimepointAttr</li> </ul> </li> <li>Type constraints<ul> <li>constant resource</li> <li>external resource</li> <li>staging resource</li> <li>transient resource</li> <li>resource</li> <li>variable resource</li> </ul> </li> <li>Types<ul> <li>BindingType</li> <li>ChannelType</li> <li>FileType</li> <li>ResourceType</li> <li>TimepointType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Stream/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Stream/#async-control-flow-ops","title":"Async control flow ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasynccall-streamasynccallop","title":"<code>stream.async.call</code> (Stream::AsyncCallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.call` (`on` `(` $affinity^ `)`)?\n              $callee ``\n              custom&lt;DispatchOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_ends,\n              $resource_operand_lengths) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls a function taking/returning resource values with stream semantics. Asynchronous calls must have no side-effects.</p> <p>Note that returned resources must have their sizes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the size (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If sizes are unknownable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>CallOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type or any type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_ends</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamasyncconcurrent-streamasyncconcurrentop","title":"<code>stream.async.concurrent</code> (Stream::AsyncConcurrentOp)","text":"<p>Executes all ops concurrently</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.concurrent` (`on` `(` $affinity^ `)`)?\n              `with` ``\n              custom&lt;ResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands, $body)\n              attr-dict-with-keyword\n</code></pre> <p>Represents a wave of work scheduled concurrently (each op executing at the same time). All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution.</p> <p>Waves can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+---------v\n+-------|-------+   +-------|-------+\n|    v--+--v    |   |    v--+--v    |\n| +----+ +----+ |   | +----+ +----+ |\n| | %a | | %b | |   | | %c | | %d | |\n| +----+ +----+ |   | +----+ +----+ |\n|    +--v--+    |   |    +--v--+    |\n+-------|-------+   +-------|-------+\n        +---------v---------+\n                  |\n</code></pre></p> <p>Represented with nested waves: <pre><code>  %0 = stream.async.concurrent with(%arg) -&gt; ... {\n    %1 = stream.async.concurrent with(%arg as %arg0) -&gt; ... {\n      %a = ...\n      %b = ...\n      stream.yield %a, %b\n    }\n    %2 = stream.async.concurrent with(%arg as %arg1) -&gt; ... {\n      %c = ...\n      %d = ...\n      stream.yield %c, %d\n    }\n    stream.yield %1, %2\n  }\n</code></pre></p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>HasParent&lt;IREE::Stream::AsyncExecuteOp, IREE::Stream::AsyncConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ClosureOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_1","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_1","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncexecute-streamasyncexecuteop","title":"<code>stream.async.execute</code> (Stream::AsyncExecuteOp)","text":"<p>Executes a dependency-aware sequence of streamable ops</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.execute` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `with` ``\n              custom&lt;ResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands, $body)\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested <code>stream.async.concurrent</code> ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ClosureOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_2","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>result_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_2","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamasyncfunc-streamasyncfuncop","title":"<code>stream.async.func</code> (Stream::AsyncFuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              ``\n              custom&lt;ShapedFunctionSignature&gt;($function_type,\n              $tied_operands,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>stream.async.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#channel-ops","title":"Channel ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamchannelcount-streamchannelcountop","title":"<code>stream.channel.count</code> (Stream::ChannelCountOp)","text":"<p>Returns the total number of participants in the group</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.count` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total participant count in the collective communicator group.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_3","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_3","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamchannelcreate-streamchannelcreateop","title":"<code>stream.channel.create</code> (Stream::ChannelCreateOp)","text":"<p>Creates a new channel for collective communication</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.create` (`on` `(` $affinity^ `)`)?\n              (`id` `(` $id^ `)`)?\n              (`group` `(` $group^ `)`)?\n              (`rank` `(` $rank^ `)`)?\n              (`count` `(` $count^ `)`)?\n              `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new channel with the given rank associated with the specified affinity. Collective operations using this channel must only be submitted on compatible affinities.</p> <p>The group and ID are optional and may be null. The rank and count can be omitted to indicate a default inherited from the environment or device configuration at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>group</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_4","title":"Operands:","text":"Operand Description <code>id</code> a reference counted byte buffer <code>rank</code> index <code>count</code> index"},{"location":"reference/mlir-dialects/Stream/#results_4","title":"Results:","text":"Result Description <code>result</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#streamchannelrank-streamchannelrankop","title":"<code>stream.channel.rank</code> (Stream::ChannelRankOp)","text":"<p>Returns the rank of the local participant in the group</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.rank` $channel `:` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the rank the channel represents as a participant in a collective group in <code>[0, count)</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_5","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_5","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamchannelsplit-streamchannelsplitop","title":"<code>stream.channel.split</code> (Stream::ChannelSplitOp)","text":"<p>Splits a collective communication channel</p> <p>Syntax:</p> <pre><code>operation ::= `stream.channel.split` $channel `,` $color `,` $key\n              `:` type($channel) `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_6","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel <code>color</code> index <code>key</code> index"},{"location":"reference/mlir-dialects/Stream/#results_6","title":"Results:","text":"Result Description <code>result</code> a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#executable-ops","title":"Executable ops","text":""},{"location":"reference/mlir-dialects/Stream/#streambindingsubspan-streambindingsubspanop","title":"<code>stream.binding.subspan</code> (Stream::BindingSubspanOp)","text":"<p>Returns an alias to a subspan of interface binding data</p> <p>Syntax:</p> <pre><code>operation ::= `stream.binding.subspan` $binding `` `[` $byte_offset `]`\n              attr-dict `:` type($binding) `-&gt;` type($result) (`{` $dynamic_dims^ `}`)?\n</code></pre> <p>Returns a subview to a tensor or memref-like type from a binding. The same binding may have multiple subviews at different byte offsets.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_7","title":"Operands:","text":"Operand Description <code>binding</code> a managed resource binding into an executable scope <code>byte_offset</code> index <code>dynamic_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_7","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupcount-streamdispatchworkgroupcountop","title":"<code>stream.dispatch.workgroup.count</code> (Stream::DispatchWorkgroupCountOp)","text":"<p>Returns the total workgroup count of the grid</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The total number of workgroups along each dimension in the dispatch grid.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>NumWorkgroups</code> SPIR-V built-in and the <code>gridDim</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.count[0] : index\n%y = stream.dispatch.workgroup.count[1] : index\n%z = stream.dispatch.workgroup.count[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_8","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupid-streamdispatchworkgroupidop","title":"<code>stream.dispatch.workgroup.id</code> (Stream::DispatchWorkgroupIDOp)","text":"<p>Returns the index of the current workgroup in the grid</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The global workgroup ID of the current workgroup in the range of <code>[0, stream.dispatch.workgroup.count)</code> along each dimension.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupId</code> SPIR-V built-in and the <code>blockIdx</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.id[0] : index\n%y = stream.dispatch.workgroup.id[1] : index\n%z = stream.dispatch.workgroup.id[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_9","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamdispatchworkgroupsize-streamdispatchworkgroupsizeop","title":"<code>stream.dispatch.workgroup.size</code> (Stream::DispatchWorkgroupSizeOp)","text":"<p>Returns the size of each workgroup in invocations</p> <p>Syntax:</p> <pre><code>operation ::= `stream.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n</code></pre> <p>The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.</p> <p>Workgroup sizes are not determined at the stream dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.</p> <p>Represented as a 3D grid classically written as XYZ. Corresponds to the <code>WorkgroupSize</code> SPIR-V built-in and the <code>blockDim</code> CUDA built-in variable.</p> <pre><code>%x = stream.dispatch.workgroup.size[0] : index\n%y = stream.dispatch.workgroup.size[1] : index\n%z = stream.dispatch.workgroup.size[2] : index\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>dimension</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Stream/#results_10","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamexecutableend-streamexecutableendop","title":"<code>stream.executable.end</code> (Stream::ExecutableEndOp)","text":"<p>Terminator pseudo-op for the executable op</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable.end` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::Stream::ExecutableOp&gt;</code>, <code>Terminator</code></p>"},{"location":"reference/mlir-dialects/Stream/#streamexecutableexport-streamexecutableexportop","title":"<code>stream.executable.export</code> (Stream::ExecutableExportOp)","text":"<p>Defines an executable entry point for dispatch operations</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable.export` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              custom&lt;SymbolAlias&gt;($sym_name, $function_ref)\n              custom&lt;WorkgroupCountRegion&gt;($workgroup_count)\n              attr-dict-with-keyword\n</code></pre> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.</p> <p>Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.</p> <p>Traits: <code>HasParent&lt;IREE::Stream::ExecutableOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Stream/#streamexecutable-streamexecutableop","title":"<code>stream.executable</code> (Stream::ExecutableOp)","text":"<p>Generic executable module</p> <p>Syntax:</p> <pre><code>operation ::= `stream.executable` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::ExecutableEndOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code>, <code>Util_ObjectLike</code></p> <p>Interfaces: <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Stream/#execution-context-ops","title":"Execution context ops","text":"<p>Operations for interacting with the execution context that stream operations execute within.</p>"},{"location":"reference/mlir-dialects/Stream/#streamcontextresolve-streamcontextresolveop","title":"<code>stream.context.resolve</code> (Stream::ContextResolveOp)","text":"<p>Resolves low-level context resources based on type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.context.resolve` (`on` `(` $affinity^ `)`)?\n              attr-dict `:` type($results)\n</code></pre> <p>WIP; allows for accessing the implementation details of lower-level dialects such as the HAL. This will likely be reworked in the future to either live inside other dialects, use some op interface instead of having a dedicated op here, or remove the op entirely and make resolution happen explicitly.</p> <p>Examples: <pre><code>// Returns a HAL device.\n= stream.context.resolve on(#something) : !hal.device\n// Returns a HAL device and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.device, i64\n// Returns a HAL allocator and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.allocator, i64\n</code></pre></p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#explicit-command-ops","title":"Explicit command ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamcmdcall-streamcmdcallop","title":"<code>stream.cmd.call</code> (Stream::CmdCallOp)","text":"<p>Calls a streamable external host function</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.call` $callee ``\n              custom&lt;CmdCallOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_lengths,\n              $resource_operand_accesses) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands),\n              $resource_operand_sizes,\n              type($results),\n              $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls a function operating on resource values with stream semantics. Asynchronous calls must have no side-effects.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>CallOpInterface</code>, <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>SymbolUserOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>resource_operand_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_8","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of index or integer or floating-point or complex-type or resource or external resource or transient resource or variable resource or constant resource or any type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_12","title":"Results:","text":"Result Description <code>results</code> variadic of index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamcmdcollective-streamcmdcollectiveop","title":"<code>stream.cmd.collective</code> (Stream::CmdCollectiveOp)","text":"<p>Dispatches a collective operation</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.collective` `` $op `` `[` $element_count `]`\n              `channel` `(` $channel `)`\n              (`param` `(` $param^ `:` type($param) `)`)? `{`\n              custom&lt;DispatchResources&gt;($resources, type($resources), $resource_sizes,\n              $resource_offsets, $resource_lengths,\n              $resource_accesses)\n              `\\n` `}`\n              attr-dict-with-keyword\n</code></pre> <p>Dispatches a collective operation specified against the device. If grouped with other collectives in a <code>stream.cmd.concurrent</code> region the collective operations may fuse and execute more efficiently.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification <code>resource_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_9","title":"Operands:","text":"Operand Description <code>channel</code> a collective communication channel <code>element_count</code> index <code>param</code> 32-bit signless integer <code>resources</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>resource_sizes</code> variadic of index <code>resource_offsets</code> variadic of index <code>resource_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdconcurrent-streamcmdconcurrentop","title":"<code>stream.cmd.concurrent</code> (Stream::CmdConcurrentOp)","text":"<p>Executes all ops concurrently</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.concurrent` $body\n              attr-dict-with-keyword\n</code></pre> <p>Represents a wave of work scheduled concurrently (each op executing at the same time).</p> <p>Waves can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+---------v\n+-------|-------+   +-------|-------+\n|    v--+--v    |   |    v--+--v    |\n| +----+ +----+ |   | +----+ +----+ |\n| | @a | | @b | |   | | @c | | @d | |\n| +----+ +----+ |   | +----+ +----+ |\n|    +--v--+    |   |    +--v--+    |\n+-------|-------+   +-------|-------+\n        +---------v---------+\n                  |\n</code></pre></p> <p>Represented with nested waves: <pre><code>  stream.cmd.concurrent {\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @a\n      stream.cmd.dispatch @b\n    }\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @c\n      stream.cmd.dispatch @d\n    }\n  }\n</code></pre></p> <p>Traits: <code>HasParent&lt;IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>RegionBranchOpInterface</code>, <code>Stream_StreamableOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#streamcmdcopy-streamcmdcopyop","title":"<code>stream.cmd.copy</code> (Stream::CmdCopyOp)","text":"<p>Copies a subview of a stream resource to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.copy` $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_10","title":"Operands:","text":"Operand Description <code>source</code> any stream-compatible type <code>source_size</code> index <code>source_offset</code> index <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmddiscard-streamcmddiscardop","title":"<code>stream.cmd.discard</code> (Stream::CmdDiscardOp)","text":"<p>Discards a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.discard` $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Discards a subview of a resource, indicating that after this command the specified contents are no longer needed. This can be used to trim memory or invalidate caches.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_11","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmddispatch-streamcmddispatchop","title":"<code>stream.cmd.dispatch</code> (Stream::CmdDispatchOp)","text":"<p>Dispatches a parallelized grid of work</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.dispatch` custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              (`(` $uniform_operands^ `:` type($uniform_operands) `)`)? `{`\n              custom&lt;DispatchResources&gt;($resources, type($resources), $resource_sizes,\n              $resource_offsets, $resource_lengths,\n              $resource_accesses)\n              `\\n` `}`\n              attr-dict-with-keyword\n</code></pre> <p>Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>SymbolUserOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>resource_accesses</code>::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_12","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>uniform_operands</code> variadic of index or integer or floating-point or complex-type <code>resources</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>resource_sizes</code> variadic of index <code>resource_offsets</code> variadic of index <code>resource_lengths</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdexecute-streamcmdexecuteop","title":"<code>stream.cmd.execute</code> (Stream::CmdExecuteOp)","text":"<p>Executes a dependency-aware sequence of streamable ops</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.execute` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `with` ``\n              custom&lt;ExplicitResourceRegion&gt;($resource_operands,\n              type($resource_operands), $resource_operand_sizes,\n              $body)\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested <code>stream.cmd.concurrent</code> ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>ClosureOpInterface</code>, <code>InferTypeOpInterface</code>, <code>RegionBranchOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_13","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_13","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamcmdfill-streamcmdfillop","title":"<code>stream.cmd.fill</code> (Stream::CmdFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.fill` $value `,`\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($value) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_14","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#streamcmdflush-streamcmdflushop","title":"<code>stream.cmd.flush</code> (Stream::CmdFlushOp)","text":"<p>Flushes a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.flush` (`to` `(` $source_affinity^ `)`)?\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource to an external target. The resource memory is made available to the target and can be made visible there using <code>stream.cmd.invalidate</code>.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_15","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmdfunc-streamcmdfuncop","title":"<code>stream.cmd.func</code> (Stream::CmdFuncOp)","text":"<p>Streamable function declaration</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.func` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name ``\n              custom&lt;DispatchFunctionSignature&gt;($function_type,\n              $arg_attrs,\n              $res_attrs)\n              attr-dict-with-keyword\n              ($body^)?\n</code></pre> <p>Declares a function that can be called as an asynchronous streaming operation via <code>stream.cmd.call</code>. Today only external functions are allowed.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#streamcmdinvalidate-streamcmdinvalidateop","title":"<code>stream.cmd.invalidate</code> (Stream::CmdInvalidateOp)","text":"<p>Invalidates a subview of a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.invalidate` (`from` `(` $source_affinity^ `)`)?\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource from an external source into the current target. The resource memory is assumed to have been made available at the source via <code>stream.cmd.flush</code>.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>Stream_StreamableOp</code>, <code>Stream_SubviewEffectOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_16","title":"Operands:","text":"Operand Description <code>target</code> any stream-compatible type <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index"},{"location":"reference/mlir-dialects/Stream/#streamcmdserial-streamcmdserialop","title":"<code>stream.cmd.serial</code> (Stream::CmdSerialOp)","text":"<p>Executes all ops serially (in-order)</p> <p>Syntax:</p> <pre><code>operation ::= `stream.cmd.serial` $body\n              attr-dict-with-keyword\n</code></pre> <p>Represents a sequence of work scheduled serially (each op executing one after the other).</p> <p>Regions can be nested to create a DAG. For example, take the following graph: <pre><code>                  |\n        v---------+-----v\n+-------|-------+   +---|----+\n|    v--+--v    |   |   v    |\n| +----+ +----+ |   | +----+ |\n| | @a | | @b | |   | | @c | |\n| +----+ +----+ |   | +----+ |\n|    |     |    |   |   |    |\n|    |     |    |   | +-v--+ |\n|    |     |    |   | | @d | |\n|    |     |    |   | +----+ |\n|    +--v--+    |   |   |    |\n+-------|-------+   +---|----+\n        +---------v-----+\n                  |\n</code></pre></p> <p>Represented with nested regions: <pre><code>  stream.cmd.concurrent {\n    stream.cmd.concurrent {\n      stream.cmd.dispatch @a\n      stream.cmd.dispatch @b\n    }\n    stream.cmd.serial {\n      stream.cmd.dispatch @c\n      stream.cmd.dispatch @d\n    }\n  }\n</code></pre></p> <p>Traits: <code>HasParent&lt;IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>RecursiveMemoryEffects</code>, <code>SingleBlockImplicitTerminator&lt;IREE::Stream::YieldOp&gt;</code>, <code>SingleBlock</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>RegionBranchOpInterface</code>, <code>Stream_StreamableOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#file-ops","title":"File ops","text":"<p>File ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamfileconstant-streamfileconstantop","title":"<code>stream.file.constant</code> (Stream::FileConstantOp)","text":"<p>Creates a file backed by the provided constant host memory</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.constant` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `for` $source_length `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Synchronously wraps a host heap buffer into a stream-accessible file handle. Changing the source buffer after definition has undefined behavior.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_17","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>source_length</code> index"},{"location":"reference/mlir-dialects/Stream/#results_14","title":"Results:","text":"Result Description <code>result</code> a file handle used for I/O operations"},{"location":"reference/mlir-dialects/Stream/#streamfileread-streamfilereadop","title":"<code>stream.file.read</code> (Stream::FileReadOp)","text":"<p>Reads a segment of a file into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.read` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads a segment of a file into a resource.</p> <p>Some implementations can stream directly from the source file into device-local memory and file ops should be preferred to manually staging memory through host buffers.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_18","title":"Operands:","text":"Operand Description <code>source</code> a file handle used for I/O operations <code>source_offset</code> 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_15","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamfilewrite-streamfilewriteop","title":"<code>stream.file.write</code> (Stream::FileWriteOp)","text":"<p>Writes a segment of a file from a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.file.write` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target)\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously writes a segment of a resource into a file. The file range must be valid within the file as this operation cannot grow the underlying file storage.</p> <p>Some implementations can stream directly from device-local memory into the target file and file ops should be preferred to manually staging memory through host buffers.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_19","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>target</code> a file handle used for I/O operations <code>target_offset</code> 64-bit signless integer <code>length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_16","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#miscellaneous-ops","title":"Miscellaneous ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamreturn-streamreturnop","title":"<code>stream.return</code> (Stream::ReturnOp)","text":"<p>Returns results from a region</p> <p>Syntax:</p> <pre><code>operation ::= `stream.return` attr-dict\n              $operands `:` type($operands)\n</code></pre> <p>The values returned are copied by-value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Stream::ExecutableExportOp&gt;</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_20","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamyield-streamyieldop","title":"<code>stream.yield</code> (Stream::YieldOp)","text":"<p>Yields stream values from an execution region</p> <p>Syntax:</p> <pre><code>operation ::= `stream.yield` attr-dict\n              ($resource_operands^ `:`\n              custom&lt;ShapedTypeList&gt;(type($resource_operands),\n              $resource_operand_sizes))?\n</code></pre> <p>The values returned represent the asynchronous value at the point in time the SSA value is defined (or tied).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Stream::AsyncExecuteOp, IREE::Stream::AsyncConcurrentOp, IREE::Stream::CmdExecuteOp, IREE::Stream::CmdSerialOp, IREE::Stream::CmdConcurrentOp&gt;</code>, <code>SameVariadicOperandSize</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_21","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#pseudo-ops","title":"Pseudo Ops","text":"<p>Pseudo ops for conversion support.</p>"},{"location":"reference/mlir-dialects/Stream/#streamtensorexport-streamtensorexportop","title":"<code>stream.tensor.export</code> (Stream::TensorExportOp)","text":"<p>Conversion placeholder for stream-&gt;other type conversion</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.export` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to a higher-level dialect type such as <code>tensor</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_22","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_17","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Stream/#streamtensorimport-streamtensorimportop","title":"<code>stream.tensor.import</code> (Stream::TensorImportOp)","text":"<p>Conversion placeholder for other-&gt;stream type conversion</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.import` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              type($source)\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion from a higher-level dialect type such as <code>tensor</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_23","title":"Operands:","text":"Operand Description <code>source</code> any type <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_18","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#resource-ops","title":"Resource ops","text":"<p>Generic resource ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloc-streamresourceallocop","title":"<code>stream.resource.alloc</code> (Stream::ResourceAllocOp)","text":"<p>Allocates a persistent resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.alloc` (`on` `(` $affinity^ `)`)?\n              (`uninitialized` $uninitialized^)?\n              attr-dict `:`\n              type($result) `{` $storage_size `}`\n</code></pre> <p>Allocates a persistent value (one that is long-lived and possibly external to the program) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>Uninitialized allocations will have undefined contents and must only be used when all bytes are discarded prior to any reads. Runtimes decide what \"undefined contents\" means and here it only indicates that execution will be correct even if the memory starts with non-zero values.</p> <p>If multiple values are allocated from the same operation it implies that they have matching lifetimes. When lowering to execution environments the separate allocations may be fused into one or more slab allocations in order to reduce overheads. How many allocations can be fused is based on the size of the individual resources and the target constraints (how large any single buffer may be, etc).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>uninitialized</code>::mlir::UnitAttrunit attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_24","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_19","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloca-streamresourceallocaop","title":"<code>stream.resource.alloca</code> (Stream::ResourceAllocaOp)","text":"<p>Allocates a transient value with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.alloca` `uninitialized`\n              (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`):(`:`)?\n              attr-dict\n              type($result) `{` $storage_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n</code></pre> <p>Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>The resource returned is not valid for use until the timepoint is reached; execution using this resource must await on the timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_25","title":"Operands:","text":"Operand Description <code>storage_size</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_20","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceconstants-streamresourceconstantsop","title":"<code>stream.resource.constants</code> (Stream::ResourceConstantsOp)","text":"<p>Asynchronously uploads or maps constant values</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.constants` (`on` `(` $affinity^ `)`)?\n              attr-dict `:`\n              custom&lt;ConstantValueList&gt;(type($results),\n              $result_sizes,\n              $values)\n              `\\n` ` ` ` ` `=` `` `&gt;` type($result_timepoint)\n</code></pre> <p>Represents an upload of constant resources that may be packed, suballocated, and mapped depending on the final lowering target.</p> <p>In runtime environments where memory is shared between host and device this turns into a mapping operation that avoids additional memory allocation and copies. When memory cannot be shared an asynchronous stream will be created to allocate and copy all of the constant values.</p> <p>Though this op returns a unique resource for each constant value it's expected that almost all end up aliasing into the same storage. The exact packing and number of storage resources that are needed are not known until lowering to a particular backend, though, so they are separate here for proper usage tracking.</p> <p>Both constant and variable resources can be produced; a constant is immutable while a variable will be treated as a constant-value initializer for a mutable resource. By modeling these together it's not required that variable initializers first be allocated, copied to the target, and then copied into the variable storage if the target is capable of doing a direct upload or mapping.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameVariadicResultSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>values</code>::mlir::ArrayAttrconstant value array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_26","title":"Operands:","text":"Operand Description <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_21","title":"Results:","text":"Result Description <code>results</code> variadic of constant resource or variable resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourcedealloca-streamresourcedeallocaop","title":"<code>stream.resource.dealloca</code> (Stream::ResourceDeallocaOp)","text":"<p>Frees a transient value when available</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.dealloca` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              $operand `:` type($operand) `{` $operand_size `}`\n              `=` `` `&gt;` type($result_timepoint)\n              attr-dict\n</code></pre> <p>Deallocates a transient value (one that is short-lived and local to the current computation) previously allocated using <code>stream.resource.alloca</code>.</p> <p>The resource is considered live and valid until the provided timepoint is reached and the memory is only made available for future requests after the result timepoint is reached.</p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_27","title":"Operands:","text":"Operand Description <code>operand</code> any stream-compatible type <code>operand_size</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_22","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceload-streamresourceloadop","title":"<code>stream.resource.load</code> (Stream::ResourceLoadOp)","text":"<p>Loads a value from a staging resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.load` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element(s) at the given offset in the staging resource. The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be transferred.</p> <p>Interfaces: <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_28","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_size</code> index <code>source_offset</code> index"},{"location":"reference/mlir-dialects/Stream/#results_23","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcepack-streamresourcepackop","title":"<code>stream.resource.pack</code> (Stream::ResourcePackOp)","text":"<p>Packs variable-sized slices into a single slab</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.pack` (`on` `(` $affinity^ `)`)?\n              (`offset` `(` $offset^ `)`)?\n              `slices` `(` `{`\n              custom&lt;PackSliceRanges&gt;($lifetime_intervals,\n              $dynamic_slice_sizes,\n              type($packed_offsets))\n              `}` `)`\n              `:` type($total_length)\n              attr-dict-with-keyword\n</code></pre> <p>Performs a greedy packing of one or more sized slices with specified lifetimes and returns their relative offsets in an aliased linear space.</p> <p>Slices are <code>[start, end] = %slice_byte_size</code>, where the start and end values define an inclusive lifetime range and the size is the total number of bytes required to be live for that range.</p> <pre><code>// Computes the total length required for the packed values and the offsets\n// of the 3 slices requested relative to the base of the packed memory:\n%total_length, %offset_0, %offset_1, %offset_2 =\n    stream.resource.pack\n        // Each slice gets one result offset:\n        slices({\n          // 3 slices where A and B overlap and will get unique offsets\n          // while B and C do not overlap and are allowed to alias.\n          [0, 10] = %size_0,  // A =&gt; %offset_0\n          [3,  8] = %size_1,  // B =&gt; %offset_1\n          [9, 10] = %size_2,  // C =&gt; %offset_2\n          ...\n        }) : index\n</code></pre> <p>The lifetime start and end points (inclusive) are only used for relative comparisons and may originate with any meaning (op order in block, epoch, phase of the moon, etc). The packing algorithm uses the intervals to determine slice liveness and when aliasing is safe.</p> <p>The size of each slice may either be a constant or runtime-computed dynamic value. Constant slices can achieve more dense packing than the dynamic values and CSE/canonicalization should be applied to ensure that as many of the dynamic values are equivalent if possible.</p> <p>The total length required to pack all slices is returned and can be used to acquire storage. The individual slice offsets are 0-based and as such if are directly used as buffer offsets may need additional offsetting. This can either be applied via the optional <code>offset</code> operand or slicing of the underlying allocation buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>lifetime_intervals</code>::mlir::ArrayAttrindex array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_29","title":"Operands:","text":"Operand Description <code>offset</code> index <code>dynamic_slice_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_24","title":"Results:","text":"Result Description <code>total_length</code> index <code>packed_offsets</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamresourcesize-streamresourcesizeop","title":"<code>stream.resource.size</code> (Stream::ResourceSizeOp)","text":"<p>Returns the size of the resource storage in bytes</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.size` (`on` `(` $affinity^ `)`)?\n              $operand\n              attr-dict `:` type($operand)\n</code></pre> <p>Returns a possibly runtime-dynamic byte size of the resource backing storage. This may differ from the logical storage size of a value based on the alignment requirements of the target as well as encoding of higher level values such as sparse tensor formats.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_30","title":"Operands:","text":"Operand Description <code>operand</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#results_25","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Stream/#streamresourcestore-streamresourcestoreop","title":"<code>stream.resource.store</code> (Stream::ResourceStoreOp)","text":"<p>Stores a value into a staging resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.store` $value `,`\n              $target `[` $target_offset `]` `:`\n              type($value)\n              `-&gt;`\n              type($target) `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be acquired.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_31","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_size</code> index <code>target_offset</code> index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcesubview-streamresourcesubviewop","title":"<code>stream.resource.subview</code> (Stream::ResourceSubviewOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.subview` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Aliases a byte subrange of a resource.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>StreamableOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code>, <code>Util_SubrangeOp</code>, <code>ViewLikeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_32","title":"Operands:","text":"Operand Description <code>source</code> any stream-compatible type <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_26","title":"Results:","text":"Result Description <code>result</code> any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcetry_map-streamresourcetrymapop","title":"<code>stream.resource.try_map</code> (Stream::ResourceTryMapOp)","text":"<p>Maps read-only memory into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.resource.try_map` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `]` `:`\n              type($source)\n              `-&gt;`\n              type($did_map) `,` type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Synchronously maps a host heap buffer into a stream-accessible resource with the requested lifetime. If the given source cannot be mapped the <code>did_map</code> result will be 0 and users must find another route into memory (such as file I/O). The resulting resource is not coherent with the source and behavior is undefined if the underlying contents change.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_33","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_27","title":"Results:","text":"Result Description <code>did_map</code> 1-bit signless integer <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#resource-parameter-io-ops","title":"Resource parameter I/O ops","text":"<p>Resource parameter I/O ops.</p>"},{"location":"reference/mlir-dialects/Stream/#streamparametergather-streamparametergatherop","title":"<code>stream.parameter.gather</code> (Stream::ParameterGatherOp)","text":"<p>Gathers multiple resources from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.gather` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterGatherOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              $target, type($target), $target_size, $target_offsets, $target_lengths)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously gathers one or more resources into a single target stream resource. This is equivalent to one <code>stream.parameter.read</code> per parameter but allows implementations that can batch operations to do so without additional timeline overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_34","title":"Operands:","text":"Operand Description <code>source_offsets</code> variadic of 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offsets</code> variadic of index <code>target_lengths</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_28","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterload-streamparameterloadop","title":"<code>stream.parameter.load</code> (Stream::ParameterLoadOp)","text":"<p>Reads one or more resources from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.load` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterLoadOperations&gt;(\n              $source_scope, $source_keys, $source_offsets,\n              type($results), $result_sizes)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads one or more resources from an external parameter provider and returns the resulting stream resources. Depending on the resource type this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if <code>stream.resource.alloca</code> and <code>stream.parameter.read</code> had been used per parameter.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_35","title":"Operands:","text":"Operand Description <code>source_offsets</code> variadic of 64-bit signless integer <code>result_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_29","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterread-streamparameterreadop","title":"<code>stream.parameter.read</code> (Stream::ParameterReadOp)","text":"<p>Reads a resource from a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.read` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              custom&lt;ParameterReference&gt;($source_scope, $source_key)\n              `` `[` $source_offset `]` `-&gt;`\n              $target `[` $target_offset `for` $target_length `]` `:`\n              type($target) `` `{` $target_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously reads a resource from an external parameter provider into the provided target resource range.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_scope</code>::mlir::StringAttrstring attribute <code>source_key</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_36","title":"Operands:","text":"Operand Description <code>source_offset</code> 64-bit signless integer <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_length</code> index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_30","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterscatter-streamparameterscatterop","title":"<code>stream.parameter.scatter</code> (Stream::ParameterScatterOp)","text":"<p>Scatters multiple resources to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.scatter` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              `{`\n              custom&lt;ParameterScatterOperations&gt;(\n              $source, type($source), $source_size, $source_offsets, $source_lengths,\n              $target_scope, $target_keys, $target_offsets)\n              `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously scatters one or more resources from a single source resource into one or more parameters. This is equivalent to one <code>stream.parameter.write</code> per parameter but allows implementations that can batch operations to do so without additional overhead.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_keys</code>::mlir::ArrayAttrstring array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_37","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offsets</code> variadic of index <code>source_lengths</code> variadic of index <code>target_offsets</code> variadic of 64-bit signless integer <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_31","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterwrite-streamparameterwriteop","title":"<code>stream.parameter.write</code> (Stream::ParameterWriteOp)","text":"<p>Writes a resource to a parameter scope</p> <p>Syntax:</p> <pre><code>operation ::= `stream.parameter.write` (`on` `(` $affinity^ `)`)?\n              (`await` `(` $await_timepoint^ `)` `=` `` `&gt;`)?\n              $source `[` $source_offset `for` $source_length `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ParameterReference&gt;($target_scope, $target_key)\n              `` `[` $target_offset `]`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Asynchronously writes a resource to an external parameter provider from the provided source resource range.</p> <p>Traits: <code>Stream_CmdPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_TimelineOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_scope</code>::mlir::StringAttrstring attribute <code>target_key</code>::mlir::StringAttrstring attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_38","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_length</code> index <code>target_offset</code> 64-bit signless integer <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_32","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#resource-transfer-ops","title":"Resource transfer ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasyncalloca-streamasyncallocaop","title":"<code>stream.async.alloca</code> (Stream::AsyncAllocaOp)","text":"<p>Allocates a transient value with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.alloca` (`on` `(` $affinity^ `)`)?\n              attr-dict `:` type($result) `{` $storage_size `}`\n</code></pre> <p>Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use <code>discard</code> access.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AffinityOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_39","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_33","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncclone-streamasynccloneop","title":"<code>stream.async.clone</code> (Stream::AsyncCloneOp)","text":"<p>Clones the contents of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.clone` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_40","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_34","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccollective-streamasynccollectiveop","title":"<code>stream.async.collective</code> (Stream::AsyncCollectiveOp)","text":"<p>Performs a collective operation</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.collective` `` $op `` `[` $element_count `]`\n              (`on` `(` $affinity^ `)`)?\n              `channel` `(` $channel `)`\n              custom&lt;CollectiveParam&gt;(ref($op), $param) ``\n              $source `[` $source_offset `to` $source_end `for` $source_length `]` `,`\n              $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>TODO: document different usage. For now this should be considered a prototype and that modeling of collective operations may change in the future to better ensure in-place operations (where send/recv is a subset of recv/send). We may have dedicated operations for the send and recv verbs as they have sequencing implications - or we could add optional sequencing to this base op.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>op</code>::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_41","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>target_length</code> index <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>source_length</code> index <code>element_count</code> index <code>channel</code> a collective communication channel <code>param</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_35","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncconstant-streamasyncconstantop","title":"<code>stream.async.constant</code> (Stream::AsyncConstantOp)","text":"<p>Defines a constant resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.constant` (`on` `(` $affinity^ `)`)?\n              `:`\n              type($result) `` `{` $result_size `}`\n              `=`\n              $value\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new resource with the given constant value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_42","title":"Operands:","text":"Operand Description <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_36","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccopy-streamasynccopyop","title":"<code>stream.async.copy</code> (Stream::AsyncCopyOp)","text":"<p>Copies a subview of a stream resource to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.copy` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `to` $source_end `]` `,`\n              $target `[` $target_offset `to` $target_end `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource. Unlike <code>stream.async.update</code> copy sources cannot be allocated in-place.</p> <p>Equivalent to a stream.async.slice + stream.async.update.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_43","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Stream/#results_37","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncdispatch-streamasyncdispatchop","title":"<code>stream.async.dispatch</code> (Stream::AsyncDispatchOp)","text":"<p>Dispatches a parallelized grid of work</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.dispatch` (`on` `(` $affinity^ `)`)?\n              custom&lt;DispatchEntryPoints&gt;($entry_points)\n              (`[` $workload^ `]`)? ``\n              custom&lt;DispatchOperands&gt;($resource_operands,\n              $resource_operand_offsets,\n              $resource_operand_ends,\n              $resource_operand_lengths) attr-dict `:`\n              custom&lt;ShapedFunctionType&gt;(ref($resource_operands),\n              type($resource_operands), $resource_operand_sizes,\n              type($results), $result_sizes,\n              $tied_operands)\n</code></pre> <p>Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>SymbolUserOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>entry_points</code>::mlir::ArrayAttrsymbol ref array attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_44","title":"Operands:","text":"Operand Description <code>workload</code> variadic of index <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type <code>resource_operand_sizes</code> variadic of index <code>resource_operand_offsets</code> variadic of index <code>resource_operand_ends</code> variadic of index <code>resource_operand_lengths</code> variadic of index <code>result_sizes</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_38","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncfill-streamasyncfillop","title":"<code>stream.async.fill</code> (Stream::AsyncFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.fill` (`on` `(` $affinity^ `)`)?\n              $value `,`\n              $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n              type($value) `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Equivalent to a stream.async.splat + stream.async.update.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_45","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>target_length</code> index <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_39","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncload-streamasyncloadop","title":"<code>stream.async.load</code> (Stream::AsyncLoadOp)","text":"<p>Loads a value from a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.load` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the resource.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_46","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_size</code> index <code>source_offset</code> index"},{"location":"reference/mlir-dialects/Stream/#results_40","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamasyncslice-streamasyncsliceop","title":"<code>stream.async.slice</code> (Stream::AsyncSliceOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.slice` (`on` `(` $affinity^ `)`)?\n              $source `[` $source_offset `to` $source_end `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Slices a subrange of a stream resource based on a byte range. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_47","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_size</code> index <code>source_offset</code> index <code>source_end</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_41","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncsplat-streamasyncsplatop","title":"<code>stream.async.splat</code> (Stream::AsyncSplatOp)","text":"<p>Splats a value into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.splat` (`on` `(` $affinity^ `)`)?\n              $value `:` type($value) `-&gt;` type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a new resource with the given primitive value splatted out to fill the entire contents.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_48","title":"Operands:","text":"Operand Description <code>value</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_42","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncstore-streamasyncstoreop","title":"<code>stream.async.store</code> (Stream::AsyncStoreOp)","text":"<p>Stores a value into a resource</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.store` $value `,`\n              $target `[` $target_offset `]` `:`\n              type($value)\n              `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a resource with the element at the given offset set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_49","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_size</code> index <code>target_offset</code> index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_43","title":"Results:","text":"Result Description <code>result</code> staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasynctransfer-streamasynctransferop","title":"<code>stream.async.transfer</code> (Stream::AsyncTransferOp)","text":"<p>Transfers a resource from one location/state to another</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.transfer` (`from` `(` $source_affinity^ `)`)?\n              $source `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              (`to` `(` $result_affinity^ `)`)?\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Transfers a resource between different states (such as a <code>staging</code> lifetime to a <code>local</code> lifetime) or different affinities. This is roughly equivalent to a cast but may have special semantics when later lowered to one or more devices with discrete memory spaces or pools.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity <code>result_affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_50","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>source_size</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_44","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncupdate-streamasyncupdateop","title":"<code>stream.async.update</code> (Stream::AsyncUpdateOp)","text":"<p>Updates a slice of a subview of a resource in-place</p> <p>Syntax:</p> <pre><code>operation ::= `stream.async.update` (`on` `(` $affinity^ `)`)?\n              $update `,`\n              $target `[` $target_offset `to` $target_end `]` `:`\n              type($update) `` `{` $update_size `}` `-&gt;`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a value into a resource based on a byte range. The returned value is the entire updated target value. Updates can be turned into placement allocations and avoid copies.</p> <p>Traits: <code>Stream_AsyncPhaseOp</code></p> <p>Interfaces: <code>AsyncAccessOpInterface</code>, <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_51","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_size</code> index <code>target_offset</code> index <code>target_end</code> index <code>update</code> resource or external resource or transient resource or variable resource or constant resource <code>update_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_45","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#synchronization-ops","title":"Synchronization ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtimepointawait-streamtimepointawaitop","title":"<code>stream.timepoint.await</code> (Stream::TimepointAwaitOp)","text":"<p>Awaits a timepoint before returning a set of resources</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.await` (`on` `(` $affinity^ `)`)?\n              $await_timepoint `=` `` `&gt;`\n              $resource_operands `:`\n              custom&lt;ShapedTypeList&gt;(type($resource_operands),\n              type($results), $resource_operand_sizes)\n              attr-dict-with-keyword\n</code></pre> <p>After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables resolving the version of a resource after a particular point in the timeline. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_52","title":"Operands:","text":"Operand Description <code>resource_operands</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_operand_sizes</code> variadic of index <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_46","title":"Results:","text":"Result Description <code>results</code> variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtimepointbarrier-streamtimepointbarrierop","title":"<code>stream.timepoint.barrier</code> (Stream::TimepointBarrierOp)","text":"<p>Returns a timepoint indicating when a resource is available</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.barrier` (`on` `(` $affinity^ `)`)?\n              $resource `:` type($resource) `` `{` $resource_size `}`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables identifying when the version of a resource after a particular point in the timeline is available. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_TimelineOp</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_53","title":"Operands:","text":"Operand Description <code>resource</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>resource_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_47","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource or staging resource <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointchain_external-streamtimepointchainexternalop","title":"<code>stream.timepoint.chain_external</code> (Stream::TimepointChainExternalOp)","text":"<p>Exports a timepoint to an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.chain_external` (`on` `(` $affinity^ `)`)?\n              $await_timepoint\n              `=` `` `&gt;`\n              `(` $external_values `:` type($external_values) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to an external dialect type such as <code>hal.fence</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Interfaces: <code>Stream_AffinityOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_54","title":"Operands:","text":"Operand Description <code>await_timepoint</code> a timepoint indicating execution availability <code>external_values</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointexport-streamtimepointexportop","title":"<code>stream.timepoint.export</code> (Stream::TimepointExportOp)","text":"<p>Exports a timepoint to an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.export` (`on` `(` $affinity^ `)`)?\n              $await_timepoint\n              `=` `` `&gt;`\n              `(` type($results) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion to an external dialect type such as <code>hal.fence</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_49","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_55","title":"Operands:","text":"Operand Description <code>await_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_48","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimmediate-streamtimepointimmediateop","title":"<code>stream.timepoint.immediate</code> (Stream::TimepointImmediateOp)","text":"<p>Results an immediately-available timepoint</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.immediate` attr-dict\n              `=` `` `&gt;` type($result_timepoint)\n</code></pre> <p>Timepoints indicate a point in the execution timeline and this op can be used to get a placeholder representing the start of the timeline. Any waits on the returned timepoint will resolve immediately. This generally folds away but can be useful if needing to initialize globals or branch args.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#results_49","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimport-streamtimepointimportop","title":"<code>stream.timepoint.import</code> (Stream::TimepointImportOp)","text":"<p>Imports a timepoint from an external dialect type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.import` (`on` `(` $affinity^ `)`)?\n              $operands `:` `(` type($operands) `)`\n              `=` `` `&gt;`\n              type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Defines a conversion from an external dialect type such as <code>hal.semaphore</code> that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_50","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_56","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Stream/#results_50","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointjoin-streamtimepointjoinop","title":"<code>stream.timepoint.join</code> (Stream::TimepointJoinOp)","text":"<p>Joins one or more timepoints into the max of all of them</p> <p>Syntax:</p> <pre><code>operation ::= `stream.timepoint.join` `max` `(` $await_timepoints `)` `=` `` `&gt;` type($result_timepoint)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a timepoint that indicates that all of the input timepoints have been reached.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_TimelineOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#operands_57","title":"Operands:","text":"Operand Description <code>await_timepoints</code> variadic of a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_51","title":"Results:","text":"Result Description <code>result_timepoint</code> a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtensorclone-streamtensorcloneop","title":"<code>stream.tensor.clone</code> (Stream::TensorCloneOp)","text":"<p>Clones the contents of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.clone` (`on` `(` $affinity^ `)`)?\n              $source `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_51","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_58","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_52","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorconstant-streamtensorconstantop","title":"<code>stream.tensor.constant</code> (Stream::TensorConstantOp)","text":"<p>Defines a constant tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.constant` (`on` `(` $affinity^ `)`)?\n              `:`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result)\n              `=`\n              $value\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized to the given constant value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_52","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_59","title":"Operands:","text":"Operand Description <code>result_encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_53","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorempty-streamtensoremptyop","title":"<code>stream.tensor.empty</code> (Stream::TensorEmptyOp)","text":"<p>Defines an empty tensor value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.empty` (`on` `(` $affinity^ `)`)?\n              `:`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized with no contents. This still carries shape metadata and may encode to a non-empty resource such as in cases where the empty representation still has data (e.g. sparse tensors). Subsequent writes must populate any ranges of the tensor that are later read.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_53","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_60","title":"Operands:","text":"Operand Description <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_54","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorfill-streamtensorfillop","title":"<code>stream.tensor.fill</code> (Stream::TensorFillOp)","text":"<p>Fills a subview of a stream resource with a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.fill` (`on` `(` $affinity^ `)`)?\n              $value `,` $target `[` $start_indices `for` $lengths `]` `:`\n              type($value)\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Splats a value into a subview of the given stream resource and returns the resource with the update applied.</p> <p>Equivalent to a stream.tensor.splat + stream.tensor.update.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_54","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_61","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>value</code> index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#results_55","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorload-streamtensorloadop","title":"<code>stream.tensor.load</code> (Stream::TensorLoadOp)","text":"<p>Loads a value from a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.load` $source (`[` $indices^ `]`)? `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the element at the given location from within the tensor.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_55","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_62","title":"Operands:","text":"Operand Description <code>source</code> staging resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>indices</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_56","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamtensorsizeof-streamtensorsizeofop","title":"<code>stream.tensor.sizeof</code> (Stream::TensorSizeOfOp)","text":"<p>Calculates the storage size of a given high-level type</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.sizeof` (`on` `(` $affinity^ `)`)?\n              $encoding (`{` $encoding_dims^ `}`)?\n              attr-dict `:` type($storage_size)\n</code></pre> <p>Target-dependent storage size calculation using a high-level annotated type. While within the stream dialect the storage size of a value is left as a placeholder using this op. The requisite target-specific parameters for expanding the size calculation are only available after affinities have been assigned.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_56","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_63","title":"Operands:","text":"Operand Description <code>encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_57","title":"Results:","text":"Result Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Stream/#streamtensorslice-streamtensorsliceop","title":"<code>stream.tensor.slice</code> (Stream::TensorSliceOp)","text":"<p>Slices out a cloned subview of a value</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.slice` (`on` `(` $affinity^ `)`)?\n              $source `[` $start_indices `for` $lengths `]` `:`\n              $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n              `in`\n              type($source) `` `{` $source_size `}`\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Slices a subrange of a stream resource based on a tensor encoding. Acts as a copy-on-write operation.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_57","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>source_encoding</code>::mlir::TypeAttrany type attribute <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_64","title":"Operands:","text":"Operand Description <code>source</code> resource or external resource or transient resource or variable resource or constant resource <code>source_encoding_dims</code> variadic of index <code>source_size</code> index <code>start_indices</code> variadic of index <code>lengths</code> variadic of index <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_58","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorsplat-streamtensorsplatop","title":"<code>stream.tensor.splat</code> (Stream::TensorSplatOp)","text":"<p>Splats a value into a shaped tensor</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.splat` (`on` `(` $affinity^ `)`)?\n              $value\n              `:` type($value)\n              `-&gt;`\n              $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n              `in`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a typed resource initialized to the given primitive value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Stream_AffinityOp</code>, <code>StreamableOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_58","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>result_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_65","title":"Operands:","text":"Operand Description <code>value</code> index or integer or floating-point or complex-type <code>result_encoding_dims</code> variadic of index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_59","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorstore-streamtensorstoreop","title":"<code>stream.tensor.store</code> (Stream::TensorStoreOp)","text":"<p>Stores a value into a tensor element</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.store` $value `,`\n              $target (`[` $indices^ `]`)? `:`\n              type($value)\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Returns a tensor with the element at the given index set to the given value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_59","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_66","title":"Operands:","text":"Operand Description <code>target</code> staging resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>indices</code> variadic of index <code>value</code> index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_60","title":"Results:","text":"Result Description <code>result</code> staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtensortrace-streamtensortraceop","title":"<code>stream.tensor.trace</code> (Stream::TensorTraceOp)","text":"<p>Traces one or more tensor values at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.trace` $key `=` `[`\n              custom&lt;EncodedResourceOperands&gt;(\n              $resources, type($resources), $resource_sizes,\n              $resource_encodings, $resource_encoding_dims)\n              `]` attr-dict-with-keyword\n</code></pre> <p>Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.</p> <p>Traits: <code>AttrSizedOperandSegments</code></p> <p>Interfaces: <code>ShapeAwareOpInterface</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_60","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>key</code>::mlir::StringAttrstring attribute <code>resource_encodings</code>::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_67","title":"Operands:","text":"Operand Description <code>resources</code> variadic of staging resource <code>resource_sizes</code> variadic of index <code>resource_encoding_dims</code> variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamtensorupdate-streamtensorupdateop","title":"<code>stream.tensor.update</code> (Stream::TensorUpdateOp)","text":"<p>Updates a slice of a subview of a resource in-place</p> <p>Syntax:</p> <pre><code>operation ::= `stream.tensor.update` (`on` `(` $affinity^ `)`)?\n              $update `,` $target `[` $start_indices `]` `:`\n              $update_encoding (`` `{` $update_encoding_dims^ `}`)?\n              `in`\n              type($update) `` `{` $update_size `}`\n              `-&gt;`\n              $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n              `in`\n              custom&lt;ShapedTiedResult&gt;(type($target), $target_size)\n              attr-dict-with-keyword\n</code></pre> <p>Copies a value into a resource based on tensor encodings. The returned value is the entire updated target value.</p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Stream_TensorPhaseOp</code></p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>Stream_AffinityOp</code>, <code>Stream_StreamableOp</code>, <code>TiedOpInterface</code>, <code>Util_ShapeAwareOp</code>, <code>Util_SizeAwareOp</code></p>"},{"location":"reference/mlir-dialects/Stream/#attributes_61","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>target_encoding</code>::mlir::TypeAttrany type attribute <code>update_encoding</code>::mlir::TypeAttrany type attribute <code>affinity</code>::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_68","title":"Operands:","text":"Operand Description <code>target</code> resource or external resource or transient resource or variable resource or constant resource <code>target_encoding_dims</code> variadic of index <code>target_size</code> index <code>start_indices</code> variadic of index <code>update</code> resource or external resource or transient resource or variable resource or constant resource <code>update_encoding_dims</code> variadic of index <code>update_size</code> index"},{"location":"reference/mlir-dialects/Stream/#results_61","title":"Results:","text":"Result Description <code>result</code> resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#attributes_62","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Stream/#collectiveattr","title":"CollectiveAttr","text":"<p>collective operation and specification</p> <p>Syntax:</p> <pre><code>#stream.collective&lt;\n  CollectiveKind,   # kind\n  std::optional&lt;CollectiveReductionOp&gt;,   # reduction\n  CollectiveElementType   # element_type\n&gt;\n</code></pre> <p>Specifies the collective operation to perform and any mode bits required.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters","title":"Parameters:","text":"Parameter C++ type Description kind <code>CollectiveKind</code> reduction <code>std::optional&lt;CollectiveReductionOp&gt;</code> element_type <code>CollectiveElementType</code>"},{"location":"reference/mlir-dialects/Stream/#namedparameterattr","title":"NamedParameterAttr","text":"<p>named parameter referenced an optional scope and key</p> <p>Syntax:</p> <pre><code>#stream.parameter.named&lt;\n  ::mlir::Type,   # type\n  StringAttr,   # scope\n  StringAttr,   # key\n  DictionaryAttr   # config\n&gt;\n</code></pre> <p>Species an externally-defined parameter that can be referenced by an optional scope defining a set of parameters and a key uniquely identifying the parameter within its scope.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code> scope <code>StringAttr</code> key <code>StringAttr</code> config <code>DictionaryAttr</code>"},{"location":"reference/mlir-dialects/Stream/#partitioningconfigattr","title":"PartitioningConfigAttr","text":"<p>defines partitioning configuration</p> <p>Configures the partitioning algorithm to use and its configuration. Partitioning is useful to adjust when scheduling behavior of targets is radically different - such as single-threaded vs. multi-threaded CPUs or bespoke ML accelerators vs. general purpose GPUs. This mechanism controls the amount of concurrency, parallelism, memory consumption, and latency.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description favor <code>IREE::Stream::FavorAttr</code>"},{"location":"reference/mlir-dialects/Stream/#resourceconfigattr","title":"ResourceConfigAttr","text":"<p>defines resource constraints configuration</p> <p>Defines resource storage constraints. These allow for packing and layout algorithms to ensure they are producing usable results on target devices.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description maxAllocationSize <code>int64_t</code> minBufferOffsetAlignment <code>int64_t</code> maxBufferRange <code>int64_t</code> minBufferRangeAlignment <code>int64_t</code> indexBits <code>int64_t</code> aliasMutableBindings <code>bool</code> memoryModel <code>IREE::Stream::MemoryModel</code>"},{"location":"reference/mlir-dialects/Stream/#timepointattr","title":"TimepointAttr","text":"<p>an immediately-resolved timepoint</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code>"},{"location":"reference/mlir-dialects/Stream/#type-constraints","title":"Type constraints","text":""},{"location":"reference/mlir-dialects/Stream/#constant-resource","title":"constant resource","text":"<p>Stream constants are immutable values that are available for the lifetime of the program once initialized.</p>"},{"location":"reference/mlir-dialects/Stream/#external-resource","title":"external resource","text":"<p>Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#staging-resource","title":"staging resource","text":"<p>Stream upload/download staging resource. These are used outside of streams and then transferred to other stream resources such as variables or transients for use inside of streams. Dispatches and several other operations cannot directly operate on these resources.</p>"},{"location":"reference/mlir-dialects/Stream/#transient-resource","title":"transient resource","text":"<p>Stream transients represent asynchronously-available and sequenced values that have a short lifetime - often only passed between stream executions. It is expected that transient values are not stored in global state and have minimal lifetime as they may be heavily pooled or suballocated.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#resource","title":"resource","text":"<p>A stream resource that has not yet had its lifetime calculated.</p>"},{"location":"reference/mlir-dialects/Stream/#variable-resource","title":"variable resource","text":"<p>Stream variables represent asynchronously-available and sequenced values that have a long lifetime relative to the work being performed on them. These variables are often stored in global state and may live for the entire duration of the program.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Stream/#bindingtype","title":"BindingType","text":"<p>a managed resource binding into an executable scope</p> <p>Syntax: <code>!stream.binding</code></p> <p>A resource binding available within an executable dispatch function. The bindings map 1:1 with the resources bound during dispatch operations.</p>"},{"location":"reference/mlir-dialects/Stream/#channeltype","title":"ChannelType","text":"<p>a collective communication channel</p> <p>Syntax: <code>!stream.channel</code></p> <p>Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.</p> <p>In programs that model SPMD behavior internally channels can be created or provided by hosting applications. For example, the program could expose a <code>@set_channels(!util.list&lt;!stream.channel&gt;)</code> method that stores the channels in globals for use throughout the program allowing for application-controlled channel configuration.</p>"},{"location":"reference/mlir-dialects/Stream/#filetype","title":"FileType","text":"<p>a file handle used for I/O operations</p> <p>Syntax: <code>!stream.file</code></p> <p>A file handle that can be asynchronously read and written into/from stream resources.</p>"},{"location":"reference/mlir-dialects/Stream/#resourcetype","title":"ResourceType","text":"<p>a managed resource</p> <p>Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.</p> <p>Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via <code>stream.transfer</code> - which may incur a copy.</p>"},{"location":"reference/mlir-dialects/Stream/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description lifetime <code>IREE::Stream::Lifetime</code>"},{"location":"reference/mlir-dialects/Stream/#timepointtype","title":"TimepointType","text":"<p>a timepoint indicating execution availability</p> <p>Syntax: <code>!stream.timepoint</code></p> <p>Represents a point in the execution timeline that when resolved indicates that all of the execution prior to this timepoint has completed and the results of the execution are available for use. This includes transitive dependencies as well; if timepoint B is dependent on timepoint A then when B is available so too must be A.</p>"},{"location":"reference/mlir-dialects/Util/","title":"Util","text":""},{"location":"reference/mlir-dialects/Util/#util-dialect","title":"'util' Dialect","text":"<p>A dialect used for types common across IREE subdialects.</p> <ul> <li>'util' Dialect<ul> <li>Operations<ul> <li>Address/offset arithmetic ops<ul> <li>util.align (Util::AlignOp)</li> <li>util.sizeof (Util::SizeOfOp)</li> </ul> </li> <li>Buffer ops<ul> <li>util.buffer.alloc (Util::BufferAllocOp)</li> <li>util.buffer.compare (Util::BufferCompareOp)</li> <li>util.buffer.constant (Util::BufferConstantOp)</li> <li>util.buffer.copy (Util::BufferCopyOp)</li> <li>util.buffer.dealloc (Util::BufferDeallocOp)</li> <li>util.buffer.fill (Util::BufferFillOp)</li> <li>util.buffer.hash (Util::BufferHashOp)</li> <li>util.buffer.load (Util::BufferLoadOp)</li> <li>util.buffer.size (Util::BufferSizeOp)</li> <li>util.buffer.slice (Util::BufferSliceOp)</li> <li>util.buffer.storage (Util::BufferStorageOp)</li> <li>util.buffer.store (Util::BufferStoreOp)</li> <li>util.buffer.subspan (Util::BufferSubspanOp)</li> </ul> </li> <li>Compiler hint ops<ul> <li>util.optimization_barrier (Util::OptimizationBarrierOp)</li> <li>util.unfoldable_constant (Util::UnfoldableConstantOp)</li> <li>util.unreachable (Util::UnreachableOp)</li> </ul> </li> <li>Data type conversion ops<ul> <li>util.numeric.optional_narrow (Util::NumericOptionalNarrowOp)</li> </ul> </li> <li>Global ops<ul> <li>util.global.address (Util::GlobalAddressOp)</li> <li>util.global.load.indirect (Util::GlobalLoadIndirectOp)</li> <li>util.global.load (Util::GlobalLoadOp)</li> <li>util.global (Util::GlobalOp)</li> <li>util.global.store.indirect (Util::GlobalStoreIndirectOp)</li> <li>util.global.store (Util::GlobalStoreOp)</li> </ul> </li> <li>List ops<ul> <li>util.list.create (Util::ListCreateOp)</li> <li>util.list.get (Util::ListGetOp)</li> <li>util.list.resize (Util::ListResizeOp)</li> <li>util.list.set (Util::ListSetOp)</li> <li>util.list.size (Util::ListSizeOp)</li> </ul> </li> <li>Range arithmetic ops<ul> <li>util.range.extents (Util::RangeExtentsOp)</li> <li>util.range.max (Util::RangeMaxOp)</li> <li>util.range.min (Util::RangeMinOp)</li> </ul> </li> <li>Status ops<ul> <li>util.status.check_ok (Util::StatusCheckOkOp)</li> </ul> </li> <li>Structural ops<ul> <li>util.call (Util::CallOp)</li> <li>util.func (Util::FuncOp)</li> <li>util.initializer (Util::InitializerOp)</li> <li>util.return (Util::ReturnOp)</li> </ul> </li> <li>Type manipulation ops<ul> <li>util.cast (Util::CastOp)</li> <li>util.cmp.eq (Util::CmpEQOp)</li> <li>util.null (Util::NullOp)</li> </ul> </li> <li>Value utility ops<ul> <li>util.switch (Util::SwitchOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>BytePatternAttr</li> <li>ByteRangeAttr</li> <li>CompositeAttr</li> <li>InlineAlwaysAttr</li> <li>InlineNeverAttr</li> <li>UninitializedAttr</li> </ul> </li> <li>Types<ul> <li>BufferType</li> <li>ListType</li> <li>ObjectType</li> <li>PtrType</li> <li>VariantType</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/Util/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/Util/#addressoffset-arithmetic-ops","title":"Address/offset arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilalign-utilalignop","title":"<code>util.align</code> (Util::AlignOp)","text":"<p>Aligns up to a power-of-two alignment if required</p> <p>Syntax:</p> <pre><code>operation ::= `util.align` $value `,` $alignment attr-dict `:` type($result)\n</code></pre> <p>Aligns |value| up to the given power-of-two |alignment| if required.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands","title":"Operands:","text":"Operand Description <code>value</code> signless-integer-like <code>alignment</code> signless-integer-like"},{"location":"reference/mlir-dialects/Util/#results","title":"Results:","text":"Result Description <code>result</code> signless-integer-like"},{"location":"reference/mlir-dialects/Util/#utilsizeof-utilsizeofop","title":"<code>util.sizeof</code> (Util::SizeOfOp)","text":"<p>Returns the size in bytes of a datatype</p> <p>Syntax:</p> <pre><code>operation ::= `util.sizeof` $sizedType attr-dict-with-keyword\n</code></pre> <p>Most datatypes have a static size at all layers of the compilation stack. However, those that only have a size for certain lowering flows can be challenging. This op represents such sizes in a way that can be specialized later.</p> <p>Returns the size in bytes, rounded up to the next whole byte of the specified type. This op will fold to a constant index value for IntegerType and FloatType. All others are not folded.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sizedType</code>::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Util/#results_1","title":"Results:","text":"Result Description <code>size</code> index"},{"location":"reference/mlir-dialects/Util/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/Util/#utilbufferalloc-utilbufferallocop","title":"<code>util.buffer.alloc</code> (Util::BufferAllocOp)","text":"<p>Allocates a buffer with undefined contents</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.alloc` `uninitialized`\n              attr-dict\n              `:`\n              type($result) `` `{` $storage_size `}`\n</code></pre> <p>Allocates a buffer with undefined contents. Consumers of the allocated result must assume nothing of the contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>alignment</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_1","title":"Operands:","text":"Operand Description <code>storage_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_2","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercompare-utilbuffercompareop","title":"<code>util.buffer.compare</code> (Util::BufferCompareOp)","text":"<p>Compares a range of two buffers</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.compare` $lhs `[` $lhs_offset `]` `,`\n              $rhs `[` $rhs_offset `]` `,`\n              $length `:`\n              type($lhs) `` `{` $lhs_size `}` `,`\n              type($rhs) `` `{` $rhs_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns true if the two ranges are bitwise equivalent, somewhat like memcmp.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_2","title":"Operands:","text":"Operand Description <code>lhs</code> a reference counted byte buffer <code>lhs_size</code> index <code>lhs_offset</code> index <code>rhs</code> a reference counted byte buffer <code>rhs_size</code> index <code>rhs_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_3","title":"Results:","text":"Result Description <code>result</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferconstant-utilbufferconstantop","title":"<code>util.buffer.constant</code> (Util::BufferConstantOp)","text":"<p>Constant host-side byte buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttrindex attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#results_4","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercopy-utilbuffercopyop","title":"<code>util.buffer.copy</code> (Util::BufferCopyOp)","text":"<p>Copies a range of bytes between buffers</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.copy` $source `[` $source_offset `]` `,`\n              $target `[` $target_offset `]` `,`\n              $length `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Copies a range of bytes as with memcpy (no overlapping).</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_3","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferdealloc-utilbufferdeallocop","title":"<code>util.buffer.dealloc</code> (Util::BufferDeallocOp)","text":"<p>Deallocates a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.dealloc` $operand `:` type($operand) `{` $operand_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Hints that the buffer contents can be discarded. Buffers are reference counted and other owners may keep it live beyond the dealloc.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_4","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer <code>operand_size</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferfill-utilbufferfillop","title":"<code>util.buffer.fill</code> (Util::BufferFillOp)","text":"<p>Fills a range of bytes with a value</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.fill` $pattern `,`\n              $target `[` $target_offset `for` $length `]` `:`\n              type($pattern) `-&gt;`\n              type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Fills the contents of the buffer in the given byte range with a pattern. The offset and length must match the natural alignment of the pattern type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_5","title":"Operands:","text":"Operand Description <code>pattern</code> integer or floating-point or index <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferhash-utilbufferhashop","title":"<code>util.buffer.hash</code> (Util::BufferHashOp)","text":"<p>Computes the hash of a byte range of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.hash` $source `[` $source_offset `for` $length `]`\n              `:` type($source) `` `{` $source_size `}` `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Computes the SipHash-2-4 of a value at a byte offset with the given length. This always uses a seed of <code>0x0001020304...0e0f</code> and produces a single 64 bit value.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_6","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_5","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferload-utilbufferloadop","title":"<code>util.buffer.load</code> (Util::BufferLoadOp)","text":"<p>Loads a value from a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.load` $source `[` $source_offset `for` $length `]`\n              `:` type($source) `` `{` $source_size `}` `-&gt;` type($result)\n              attr-dict-with-keyword\n</code></pre> <p>Loads a value at a byte offset. Must be aligned to the natural size of the result type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_7","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#results_6","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#utilbuffersize-utilbuffersizeop","title":"<code>util.buffer.size</code> (Util::BufferSizeOp)","text":"<p>Returns the total buffer storage size in bytes</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.size` $operand\n              `:` type($operand)\n              attr-dict-with-keyword\n</code></pre> <p>Returns the total length of the buffer in bytes from its base offset.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_8","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#results_7","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferslice-utilbuffersliceop","title":"<code>util.buffer.slice</code> (Util::BufferSliceOp)","text":"<p>Clones a subregion of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.slice` $source `[` $source_offset `]` attr-dict `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n</code></pre> <p>Returns a copy of the contents from the source buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>alignment</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_9","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_8","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbufferstorage-utilbufferstorageop","title":"<code>util.buffer.storage</code> (Util::BufferStorageOp)","text":"<p>Returns the underlying buffer storage range</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.storage` $operand\n              `:` type($operand) `` `{` $operand_size `}` `-&gt;` `(` type($result) `,` type($offset) `)`\n              attr-dict-with-keyword\n</code></pre> <p>Returns the buffer storage as a memref that must be offset and restricted to the returned range. The memref may be of any type and the user is responsible for ensuring that the reinterpret_cast-like behavior makes sense for the data they are accessing.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_10","title":"Operands:","text":"Operand Description <code>operand</code> a reference counted byte buffer <code>operand_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_9","title":"Results:","text":"Result Description <code>result</code> memref of any type values <code>offset</code> index"},{"location":"reference/mlir-dialects/Util/#utilbufferstore-utilbufferstoreop","title":"<code>util.buffer.store</code> (Util::BufferStoreOp)","text":"<p>Stores a value into a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.store` $source `,`\n              $target `[` $target_offset `for` $length `]`\n              `:` type($source) `-&gt;` type($target) `` `{` $target_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Stores a value at a byte offset. Must be aligned to the natural size of the source type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>SubrangeOperandOpInterface</code>, <code>Util_SizeAwareOp</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_11","title":"Operands:","text":"Operand Description <code>source</code> index or integer or floating-point <code>target</code> a reference counted byte buffer <code>target_size</code> index <code>target_offset</code> index <code>length</code> index"},{"location":"reference/mlir-dialects/Util/#utilbuffersubspan-utilbuffersubspanop","title":"<code>util.buffer.subspan</code> (Util::BufferSubspanOp)","text":"<p>Returns a reference to a subrange of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `util.buffer.subspan` $source `[` $source_offset `]` `:`\n              type($source) `` `{` $source_size `}` `-&gt;`\n              type($result) `` `{` $result_size `}`\n              attr-dict-with-keyword\n</code></pre> <p>Returns a logical view into an underlying source buffer. This induces aliasing and multiple SSA values may allow access to the same underlying buffer storage.</p> <p>Subspans are a compiler-only concept and are propagated by an analysis pass to result in absolute offsets on accesses any place the subrange would have been used.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SubrangeOperandOpInterface</code>, <code>TiedOpInterface</code>, <code>Util_SizeAwareOp</code>, <code>Util_SubrangeOp</code>, <code>ViewLikeOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_12","title":"Operands:","text":"Operand Description <code>source</code> a reference counted byte buffer <code>source_size</code> index <code>source_offset</code> index <code>result_size</code> index"},{"location":"reference/mlir-dialects/Util/#results_10","title":"Results:","text":"Result Description <code>result</code> a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/Util/#utiloptimization_barrier-utiloptimizationbarrierop","title":"<code>util.optimization_barrier</code> (Util::OptimizationBarrierOp)","text":"<p>Prevents compiler optimizations across a value.</p> <p>Syntax:</p> <pre><code>operation ::= `util.optimization_barrier` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.</p> <p>Traits: <code>SameOperandsAndResultType</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_13","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_11","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilunfoldable_constant-utilunfoldableconstantop","title":"<code>util.unfoldable_constant</code> (Util::UnfoldableConstantOp)","text":"<p>A constant that cannot be folded by the compiler.</p> <p>Similar to a std.constant, but is declared as having a side effect and has no folder. This is really just syntactic sugar as it is canonicalized to a std.constant wrapped in an util.optimization_barrier.</p>"},{"location":"reference/mlir-dialects/Util/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/Util/#results_12","title":"Results:","text":"Result Description \u00abunnamed\u00bb any type"},{"location":"reference/mlir-dialects/Util/#utilunreachable-utilunreachableop","title":"<code>util.unreachable</code> (Util::UnreachableOp)","text":"<p>Unreachable assertion op</p> <p>Syntax:</p> <pre><code>operation ::= `util.unreachable` $message attr-dict\n</code></pre> <p>Signals to the compiler that the parent block should not be reachable. This may be converted into a runtime assertion, though ideally they are stripped during translation.</p> <pre><code>^bb0:\n  %true = arith.constant true\n  cond_br %true, ^bb2, ^bb1\n^bb1:\n  // Indicates that this branch should never be taken.\n  util.unreachable \"shouldn't be here\"\n^bb2:\n  ...\n</code></pre> <p>Traits: <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#data-type-conversion-ops","title":"Data type conversion ops","text":""},{"location":"reference/mlir-dialects/Util/#utilnumericoptional_narrow-utilnumericoptionalnarrowop","title":"<code>util.numeric.optional_narrow</code> (Util::NumericOptionalNarrowOp)","text":"<p>Memorializes an optional numeric narrowing that is valid</p> <p>Syntax:</p> <pre><code>operation ::= `util.numeric.optional_narrow` $operand `:` type($operand) `as` $semantic_type attr-dict\n</code></pre> <p>Serves as a placeholder for points in the computation where an optional numeric narrowing can be performed without loss of information. Such ops can guide optimization passes wishing to perform precision reduction.</p> <p>In addition to the operand and result type, this op takes an additional <code>semantic_type</code> attribute representing the semantic target type which can be:   * FloatType   * Signed IntegerType   * Unsigned IntegerType</p> <p>Note that this <code>semantic_type</code> must be a sign-carrying integer if using an integer type and cannot be IndexType (i.e. it can be used to indicate a possible narrowing of an IndexType to a specific integer).</p> <p>If the operand is a TensorType, then the result must be a TensorType. The <code>semantic_type</code> constrains the element type.</p> <p>Optionally, the minimum and maximum integer values (for integer semantic types) are tracked if known.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>semantic_type</code>::mlir::TypeAttrany type attribute <code>min_value</code>::mlir::IntegerAttrarbitrary integer attribute <code>max_value</code>::mlir::IntegerAttrarbitrary integer attribute"},{"location":"reference/mlir-dialects/Util/#operands_14","title":"Operands:","text":"Operand Description <code>operand</code> signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#results_13","title":"Results:","text":"Result Description <code>result</code> signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/Util/#utilglobaladdress-utilglobaladdressop","title":"<code>util.global.address</code> (Util::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.address` (`immutable` $is_immutable^)?\n              $global attr-dict `:` qualified(type($result))\n</code></pre> <p>Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalAddressOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#results_14","title":"Results:","text":"Result Description <code>result</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilgloballoadindirect-utilgloballoadindirectop","title":"<code>util.global.load.indirect</code> (Util::GlobalLoadIndirectOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.load.indirect` (`immutable` $is_immutable^)?\n              $global attr-dict `:` qualified(type($global)) `-&gt;` type($result)\n</code></pre> <p>Returns a copy of the global variable value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#operands_15","title":"Operands:","text":"Operand Description <code>global</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#results_15","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilglobalload-utilgloballoadop","title":"<code>util.global.load</code> (Util::GlobalLoadOp)","text":"<p>Loads a value from a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.load` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($result)\n</code></pre> <p>Returns a global variable value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/Util/#results_16","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilglobal-utilglobalop","title":"<code>util.global</code> (Util::GlobalOp)","text":"<p>Stateful global variable declaration</p> <p>Syntax:</p> <pre><code>operation ::= `util.global` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different variable storage.</p> <p>Interfaces: <code>Symbol</code>, <code>Util_GlobalOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>::mlir::TypedAttrTypedAttr instance <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance"},{"location":"reference/mlir-dialects/Util/#utilglobalstoreindirect-utilglobalstoreindirectop","title":"<code>util.global.store.indirect</code> (Util::GlobalStoreIndirectOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.store.indirect` $value `,` $global attr-dict `:` type($value) `-&gt;` qualified(type($global))\n</code></pre> <p>Stores a copy of the value into a global variable.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_16","title":"Operands:","text":"Operand Description <code>value</code> any type <code>global</code> a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilglobalstore-utilglobalstoreop","title":"<code>util.global.store</code> (Util::GlobalStoreOp)","text":"<p>Stores a value into a global variable</p> <p>Syntax:</p> <pre><code>operation ::= `util.global.store` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a copy of the value into a global variable.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#operands_17","title":"Operands:","text":"Operand Description <code>value</code> any type"},{"location":"reference/mlir-dialects/Util/#list-ops","title":"List ops","text":"<p>Ops for <code>!util.list&lt;T&gt;</code> (mostly just a placeholder for now).</p>"},{"location":"reference/mlir-dialects/Util/#utillistcreate-utillistcreateop","title":"<code>util.list.create</code> (Util::ListCreateOp)","text":"<p>Creates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.create` ($initial_capacity^)? attr-dict `:` qualified(type($result))\n</code></pre> <p>Creates a new empty list with an optional initial capacity.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code>, <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_18","title":"Operands:","text":"Operand Description <code>initial_capacity</code> index"},{"location":"reference/mlir-dialects/Util/#results_17","title":"Results:","text":"Result Description <code>result</code> dense list container type"},{"location":"reference/mlir-dialects/Util/#utillistget-utillistgetop","title":"<code>util.list.get</code> (Util::ListGetOp)","text":"<p>Element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.get` $list `[` $index `]` attr-dict `:` custom&lt;ListTypeGet&gt;(type($list), type($result))\n</code></pre> <p>Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_19","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>index</code> index"},{"location":"reference/mlir-dialects/Util/#results_18","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utillistresize-utillistresizeop","title":"<code>util.list.resize</code> (Util::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.resize` operands attr-dict `:` qualified(type($list))\n</code></pre> <p>Resizes the list to contain <code>new_size</code> elements. This will either truncate the list if the existing size is greater than <code>new_size</code> or extend the list with the default list value of the element type.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_20","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>new_size</code> index"},{"location":"reference/mlir-dialects/Util/#utillistset-utillistsetop","title":"<code>util.list.set</code> (Util::ListSetOp)","text":"<p>Element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.set` $list `[` $index `]` `,` $value attr-dict `:` custom&lt;ListTypeSet&gt;(type($list), type($value))\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_21","title":"Operands:","text":"Operand Description <code>list</code> dense list container type <code>index</code> index <code>value</code> any type"},{"location":"reference/mlir-dialects/Util/#utillistsize-utillistsizeop","title":"<code>util.list.size</code> (Util::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `util.list.size` operands attr-dict `:` qualified(type($list))\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>InferTypeOpInterface</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_22","title":"Operands:","text":"Operand Description <code>list</code> dense list container type"},{"location":"reference/mlir-dialects/Util/#results_19","title":"Results:","text":"Result Description <code>result</code> index"},{"location":"reference/mlir-dialects/Util/#range-arithmetic-ops","title":"Range arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilrangeextents-utilrangeextentsop","title":"<code>util.range.extents</code> (Util::RangeExtentsOp)","text":"<p>Returns the min/max of a union of a set of ranges</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.extents` custom&lt;RangeList&gt;($offsets, $lengths) attr-dict `:` type($min)\n</code></pre> <p>Computes min(offsets) and max(offsets + lengths). Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_23","title":"Operands:","text":"Operand Description <code>offsets</code> variadic of index or integer <code>lengths</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_20","title":"Results:","text":"Result Description <code>min</code> index or integer <code>max</code> index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemax-utilrangemaxop","title":"<code>util.range.max</code> (Util::RangeMaxOp)","text":"<p>Returns the max of all values</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.max` $operands attr-dict `:` type($result)\n</code></pre> <p>Computes the max of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_24","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_21","title":"Results:","text":"Result Description <code>result</code> index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemin-utilrangeminop","title":"<code>util.range.min</code> (Util::RangeMinOp)","text":"<p>Returns the min of all values</p> <p>Syntax:</p> <pre><code>operation ::= `util.range.min` $operands attr-dict `:` type($result)\n</code></pre> <p>Computes the min of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameOperandsAndResultType</code>, <code>SameVariadicOperandSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_25","title":"Operands:","text":"Operand Description <code>operands</code> variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_22","title":"Results:","text":"Result Description <code>result</code> index or integer"},{"location":"reference/mlir-dialects/Util/#status-ops","title":"Status ops","text":""},{"location":"reference/mlir-dialects/Util/#utilstatuscheck_ok-utilstatuscheckokop","title":"<code>util.status.check_ok</code> (Util::StatusCheckOkOp)","text":"<p>Raises a global failure if a status is not 'ok'</p> <p>Syntax:</p> <pre><code>operation ::= `util.status.check_ok` $status (`,` $message^)? attr-dict\n</code></pre> <p>When the status is not 'ok' this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p>"},{"location":"reference/mlir-dialects/Util/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#operands_26","title":"Operands:","text":"Operand Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/Util/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcall-utilcallop","title":"<code>util.call</code> (Util::CallOp)","text":"<p>Function call operation</p> <p>Syntax:</p> <pre><code>operation ::= `util.call` $callee `(` $operands `)`\n              attr-dict `:`\n              custom&lt;OperandTypeList&gt;(type($operands))\n              `-&gt;`\n              custom&lt;TiedFunctionResultList&gt;(ref($operands),\n              ref(type($operands)),\n              type($results),\n              $tied_operands)\n</code></pre> <p>Represents a direct call to a function that is within the same symbol scope as the call. The operands and result types of the call must match the specified function type.</p> <p>Calls support tied operands which indicate that specific results alias a specific operand. The operand and result types are allowed to differ if a cast is performed within the callee.</p> <p>Example: <pre><code>util.func @fn(%arg0: i32, %arg1: tensor&lt;f32&gt;) -&gt; (f32, %arg1 as tensor&lt;i32&gt;)\n...\n%0 = util.call @fn(%0, %1) : (i32, tensor&lt;f32&gt;) -&gt; (f32, %1 as tensor&lt;i32&gt;)\n</code></pre></p> <p>Interfaces: <code>CallOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_TiedOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Util/#operands_27","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_23","title":"Results:","text":"Result Description <code>results</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilfunc-utilfuncop","title":"<code>util.func</code> (Util::FuncOp)","text":"<p>Function operation containing a CFG region</p> <p>An operation declaring a callable function.</p> <p>An external function declaration (used when referring to a function declared in some other module) has no body.</p> <p>Traits: <code>AffineScope</code>, <code>AutomaticAllocationScope</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>OpAsmOpInterface</code>, <code>Symbol</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>tied_operands</code>::mlir::ArrayAttr64-bit integer array attribute <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance"},{"location":"reference/mlir-dialects/Util/#utilinitializer-utilinitializerop","title":"<code>util.initializer</code> (Util::InitializerOp)","text":"<p>Global initialization function</p> <p>A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.</p> <p>Traits: <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>Util_InitializerOpInterface</code></p>"},{"location":"reference/mlir-dialects/Util/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Util/#utilreturn-utilreturnop","title":"<code>util.return</code> (Util::ReturnOp)","text":"<p>Return from a util.initializer</p> <p>Syntax:</p> <pre><code>operation ::= `util.return` attr-dict\n              ($operands^ `:` type($operands))?\n</code></pre> <p>Returns control from an initializer function.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>HasParent&lt;IREE::Util::InitializerOp, IREE::Util::FuncOp&gt;</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_28","title":"Operands:","text":"Operand Description <code>operands</code> variadic of any type"},{"location":"reference/mlir-dialects/Util/#type-manipulation-ops","title":"Type manipulation ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcast-utilcastop","title":"<code>util.cast</code> (Util::CastOp)","text":"<p>Casts one util type to another ala static_cast/dynamic_cast</p> <p>Syntax:</p> <pre><code>operation ::= `util.cast` $operand attr-dict `:` type($operand) `to` type($result)\n</code></pre> <p>Performs a type cast between object types known to the util dialect.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>CastOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>TiedOpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_29","title":"Operands:","text":"Operand Description <code>operand</code> any type"},{"location":"reference/mlir-dialects/Util/#results_24","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#utilcmpeq-utilcmpeqop","title":"<code>util.cmp.eq</code> (Util::CmpEQOp)","text":"<p>Compares two values for equality</p> <p>Syntax:</p> <pre><code>operation ::= `util.cmp.eq` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands for equality. This is intended for comparing IREE reference types (like !util.buffer) that cannot be used with std.cmpi.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_30","title":"Operands:","text":"Operand Description <code>lhs</code> any type <code>rhs</code> any type"},{"location":"reference/mlir-dialects/Util/#results_25","title":"Results:","text":"Result Description <code>result</code> 1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilnull-utilnullop","title":"<code>util.null</code> (Util::NullOp)","text":"<p>Returns a null type value</p> <p>Syntax:</p> <pre><code>operation ::= `util.null` attr-dict `:` type($result)\n</code></pre> <p>Defines an SSA value that is lowered into dialects supporting null/undefined/optional/etc values.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#results_26","title":"Results:","text":"Result Description <code>result</code> any type"},{"location":"reference/mlir-dialects/Util/#value-utility-ops","title":"Value utility ops","text":""},{"location":"reference/mlir-dialects/Util/#utilswitch-utilswitchop","title":"<code>util.switch</code> (Util::SwitchOp)","text":"<p>Primitive switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `util.switch` type($default_value) `from`\n              custom&lt;TypedValueList&gt;(ref(type($default_value)), $values, type($values))\n              `at` $index\n              `else` $default_value\n              attr-dict\n              `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = util.switch %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>InferTypeOpInterface</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/Util/#operands_31","title":"Operands:","text":"Operand Description <code>index</code> index <code>default_value</code> index or integer or floating-point <code>values</code> variadic of index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#results_27","title":"Results:","text":"Result Description <code>result</code> index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#attributes_16","title":"Attributes","text":""},{"location":"reference/mlir-dialects/Util/#bytepatternattr","title":"BytePatternAttr","text":"<p>an attribute containing a filled byte pattern</p> <p>Syntax:</p> <pre><code>#util.byte_pattern&lt;\n  ::mlir::Type,   # type\n  int64_t   # pattern\n&gt;\n</code></pre> <p>A dense serializable attribute with the given byte pattern.</p>"},{"location":"reference/mlir-dialects/Util/#parameters","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code> pattern <code>int64_t</code>"},{"location":"reference/mlir-dialects/Util/#byterangeattr","title":"ByteRangeAttr","text":"<p>defines a range of bytes</p> <p>Specifies a starting offset and total length in bytes.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description offset <code>int64_t</code> length <code>int64_t</code>"},{"location":"reference/mlir-dialects/Util/#compositeattr","title":"CompositeAttr","text":"<p>an attribute composed of a sequence of attributes</p> <p>Models a concatenated set of serializable attributes that when combined form a single sequence of i8 elements. As each value references the uniqued storage of the composite element this attribute is cheap to construct. When the full flattened range is required it can be efficiently streamed via the SerializableAttrInterface. All values must also be serializable.</p> <p>All values are tightly packed to byte boundaries. If padding is required it can be inserted as splat elements attributes with the padding value (usually 0). Sub-byte aligned element types will have their individual components padded to byte alignment.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description totalLength <code>int64_t</code> values <code>ArrayAttr</code>"},{"location":"reference/mlir-dialects/Util/#inlinealwaysattr","title":"InlineAlwaysAttr","text":"<p>forces inlining on the associated function when possible</p> <p>Syntax: <code>#util.inline.always</code></p> <p>Skips any cost-model decisions as to whether a function should be inlined into call-sites and allows the inlining to happen. Any policies that prevent inlining will still be observed and inlining may fail if any are not satisfied.</p>"},{"location":"reference/mlir-dialects/Util/#inlineneverattr","title":"InlineNeverAttr","text":"<p>disables inlining on the associated function</p> <p>Syntax: <code>#util.inline.never</code></p> <p>Disables inlining of the function the attribute is associated with into any call-site.</p>"},{"location":"reference/mlir-dialects/Util/#uninitializedattr","title":"UninitializedAttr","text":"<p>an attribute specifying uninitialized storage</p> <p>Syntax:</p> <pre><code>#util.uninitialized&lt;\n  ::mlir::Type   # type\n&gt;\n</code></pre> <p>The contents of the storage backing this attribute may be uninitialized at runtime. This is a hint to implementations that if policy allows memory allocated for the storage of this attribute type is allowed to have undefined contents upon return.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description type <code>::mlir::Type</code>"},{"location":"reference/mlir-dialects/Util/#types","title":"Types","text":""},{"location":"reference/mlir-dialects/Util/#buffertype","title":"BufferType","text":"<p>a reference counted byte buffer</p> <p>Syntax: <code>!util.buffer</code></p> <p>A reference counted byte buffer that models a pointer, offset, and length.</p>"},{"location":"reference/mlir-dialects/Util/#listtype","title":"ListType","text":"<p>dense list container type</p> <p>Syntax:</p> <pre><code>!util.list&lt;\n  Type   # element_type\n&gt;\n</code></pre> <p>Typed container supporting variant storage.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description element_type <code>Type</code>"},{"location":"reference/mlir-dialects/Util/#objecttype","title":"ObjectType","text":"<p>a placeholder for an unspecified object type</p> <p>Syntax: <code>!util.object</code></p> <p>Describes a runtime object type. These may be reference counted or garbage collected at runtime.</p>"},{"location":"reference/mlir-dialects/Util/#ptrtype","title":"PtrType","text":"<p>a pointer-like reference</p> <p>Syntax:</p> <pre><code>!util.ptr&lt;\n  Type   # target_type\n&gt;\n</code></pre> <p>A typed indirect reference to a value. These define a runtime addressable value that is strongly referenced.</p>"},{"location":"reference/mlir-dialects/Util/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description target_type <code>Type</code>"},{"location":"reference/mlir-dialects/Util/#varianttype","title":"VariantType","text":"<p>a placeholder for a variant type (<code>?</code>)</p> <p>Syntax: <code>!util.variant</code></p> <p>Describes a runtime variant type. These may be primitives (i32, f32, etc) or object types.</p>"},{"location":"reference/mlir-dialects/VM/","title":"VM","text":""},{"location":"reference/mlir-dialects/VM/#vm-dialect","title":"'vm' Dialect","text":"<p>A dialect representing operations against an abstract virtual machine.</p> <p>The virtual machine ops are designed to be either serialized to a bytecode representation that can be interpreted at runtime or lowered further to static representations such as LLVM IR, C, etc. The idea is that the types and operations performed are generally just encoding resource ownership rules and control flow that can be represented in many different ways by target runtimes. For example, it should be possible to lower the VM dialect to SPIR-V and run the VM entirely within a persistent Vulkan kernel.</p> <p>With this scalable runtime approach we make some limiting assumptions to keep the required implementations simple. As we assume all real math is happening within dispatch regions the only math we provide is scalar operations used for offset and shape calculations. This also enables simple flow control such as fixed-range loops.</p> <p>Besides integer values the only other storage type is a variant reference modeling an abstract iree_vm_ref_t. This allows automated reference counting to be relied upon by other dialects built on top of the VM dialect and avoids the need for more verbose manual reference counting logic (that may be difficult or impossible to manage given the coroutine-like nature of the VM). Lowering targets can insert the reference counting as needed.</p> <p>The types in the VM dialect correspond to the storage rather than value type, with the interpretation of the type encoded on the op.</p> <ul> <li>'vm' Dialect<ul> <li>Operations<ul> <li>Async/fiber ops<ul> <li>vm.yield (VM::YieldOp)</li> </ul> </li> <li>Bitwise shift and rotate ops<ul> <li>vm.shl.i32 (VM::ShlI32Op)</li> <li>vm.shl.i64 (VM::ShlI64Op)</li> <li>vm.shr.i32.s (VM::ShrI32SOp)</li> <li>vm.shr.i32.u (VM::ShrI32UOp)</li> <li>vm.shr.i64.s (VM::ShrI64SOp)</li> <li>vm.shr.i64.u (VM::ShrI64UOp)</li> </ul> </li> <li>Buffer ops<ul> <li>vm.buffer.alloc (VM::BufferAllocOp)</li> <li>vm.buffer.clone (VM::BufferCloneOp)</li> <li>vm.buffer.compare (VM::BufferCompareOp)</li> <li>vm.buffer.copy (VM::BufferCopyOp)</li> <li>vm.buffer.fill.f32 (VM::BufferFillF32Op)</li> <li>vm.buffer.fill.f64 (VM::BufferFillF64Op)</li> <li>vm.buffer.fill.i16 (VM::BufferFillI16Op)</li> <li>vm.buffer.fill.i32 (VM::BufferFillI32Op)</li> <li>vm.buffer.fill.i64 (VM::BufferFillI64Op)</li> <li>vm.buffer.fill.i8 (VM::BufferFillI8Op)</li> <li>vm.buffer.hash (VM::BufferHashOp)</li> <li>vm.buffer.length (VM::BufferLengthOp)</li> <li>vm.buffer.load.f32 (VM::BufferLoadF32Op)</li> <li>vm.buffer.load.f64 (VM::BufferLoadF64Op)</li> <li>vm.buffer.load.i16.s (VM::BufferLoadI16SOp)</li> <li>vm.buffer.load.i16.u (VM::BufferLoadI16UOp)</li> <li>vm.buffer.load.i32 (VM::BufferLoadI32Op)</li> <li>vm.buffer.load.i64 (VM::BufferLoadI64Op)</li> <li>vm.buffer.load.i8.s (VM::BufferLoadI8SOp)</li> <li>vm.buffer.load.i8.u (VM::BufferLoadI8UOp)</li> <li>vm.buffer.store.f32 (VM::BufferStoreF32Op)</li> <li>vm.buffer.store.f64 (VM::BufferStoreF64Op)</li> <li>vm.buffer.store.i16 (VM::BufferStoreI16Op)</li> <li>vm.buffer.store.i32 (VM::BufferStoreI32Op)</li> <li>vm.buffer.store.i64 (VM::BufferStoreI64Op)</li> <li>vm.buffer.store.i8 (VM::BufferStoreI8Op)</li> </ul> </li> <li>Casting and conversion ops<ul> <li>vm.bitcast.f32.i32 (VM::BitcastF32I32Op)</li> <li>vm.bitcast.f64.i64 (VM::BitcastF64I64Op)</li> <li>vm.bitcast.i32.f32 (VM::BitcastI32F32Op)</li> <li>vm.bitcast.i64.f64 (VM::BitcastI64F64Op)</li> <li>vm.cast.any.ref (VM::CastAnyRefOp)</li> <li>vm.cast.f32.si32 (VM::CastF32SI32Op)</li> <li>vm.cast.f32.ui32 (VM::CastF32UI32Op)</li> <li>vm.cast.ref.any (VM::CastRefAnyOp)</li> <li>vm.cast.si32.f32 (VM::CastSI32F32Op)</li> <li>vm.cast.ui32.f32 (VM::CastUI32F32Op)</li> <li>vm.ext.f32.f64 (VM::ExtF32F64Op)</li> <li>vm.ext.i16.i32.s (VM::ExtI16I32SOp)</li> <li>vm.ext.i16.i32.u (VM::ExtI16I32UOp)</li> <li>vm.ext.i16.i64.s (VM::ExtI16I64SOp)</li> <li>vm.ext.i16.i64.u (VM::ExtI16I64UOp)</li> <li>vm.ext.i32.i64.s (VM::ExtI32I64SOp)</li> <li>vm.ext.i32.i64.u (VM::ExtI32I64UOp)</li> <li>vm.ext.i8.i32.s (VM::ExtI8I32SOp)</li> <li>vm.ext.i8.i32.u (VM::ExtI8I32UOp)</li> <li>vm.ext.i8.i64.s (VM::ExtI8I64SOp)</li> <li>vm.ext.i8.i64.u (VM::ExtI8I64UOp)</li> <li>vm.trunc.f64.f32 (VM::TruncF64F32Op)</li> <li>vm.trunc.i16.i8 (VM::TruncI16I8Op)</li> <li>vm.trunc.i32.i16 (VM::TruncI32I16Op)</li> <li>vm.trunc.i32.i8 (VM::TruncI32I8Op)</li> <li>vm.trunc.i64.i16 (VM::TruncI64I16Op)</li> <li>vm.trunc.i64.i32 (VM::TruncI64I32Op)</li> <li>vm.trunc.i64.i8 (VM::TruncI64I8Op)</li> </ul> </li> <li>Comparison ops<ul> <li>vm.cmp.eq.i32 (VM::CmpEQI32Op)</li> <li>vm.cmp.eq.i64 (VM::CmpEQI64Op)</li> <li>vm.cmp.gte.i32.s (VM::CmpGTEI32SOp)</li> <li>vm.cmp.gte.i32.u (VM::CmpGTEI32UOp)</li> <li>vm.cmp.gte.i64.s (VM::CmpGTEI64SOp)</li> <li>vm.cmp.gte.i64.u (VM::CmpGTEI64UOp)</li> <li>vm.cmp.gt.i32.s (VM::CmpGTI32SOp)</li> <li>vm.cmp.gt.i32.u (VM::CmpGTI32UOp)</li> <li>vm.cmp.gt.i64.s (VM::CmpGTI64SOp)</li> <li>vm.cmp.gt.i64.u (VM::CmpGTI64UOp)</li> <li>vm.cmp.lte.i32.s (VM::CmpLTEI32SOp)</li> <li>vm.cmp.lte.i32.u (VM::CmpLTEI32UOp)</li> <li>vm.cmp.lte.i64.s (VM::CmpLTEI64SOp)</li> <li>vm.cmp.lte.i64.u (VM::CmpLTEI64UOp)</li> <li>vm.cmp.lt.i32.s (VM::CmpLTI32SOp)</li> <li>vm.cmp.lt.i32.u (VM::CmpLTI32UOp)</li> <li>vm.cmp.lt.i64.s (VM::CmpLTI64SOp)</li> <li>vm.cmp.lt.i64.u (VM::CmpLTI64UOp)</li> <li>vm.cmp.ne.i32 (VM::CmpNEI32Op)</li> <li>vm.cmp.ne.i64 (VM::CmpNEI64Op)</li> <li>vm.cmp.nz.i32 (VM::CmpNZI32Op)</li> <li>vm.cmp.nz.i64 (VM::CmpNZI64Op)</li> </ul> </li> <li>Conditional assignment ops<ul> <li>vm.select.f32 (VM::SelectF32Op)</li> <li>vm.select.f64 (VM::SelectF64Op)</li> <li>vm.select.i32 (VM::SelectI32Op)</li> <li>vm.select.i64 (VM::SelectI64Op)</li> <li>vm.select.ref (VM::SelectRefOp)</li> <li>vm.switch.f32 (VM::SwitchF32Op)</li> <li>vm.switch.f64 (VM::SwitchF64Op)</li> <li>vm.switch.i32 (VM::SwitchI32Op)</li> <li>vm.switch.i64 (VM::SwitchI64Op)</li> <li>vm.switch.ref (VM::SwitchRefOp)</li> </ul> </li> <li>Constant ops<ul> <li>vm.const.f32 (VM::ConstF32Op)</li> <li>vm.const.f32.zero (VM::ConstF32ZeroOp)</li> <li>vm.const.f64 (VM::ConstF64Op)</li> <li>vm.const.f64.zero (VM::ConstF64ZeroOp)</li> <li>vm.const.i32 (VM::ConstI32Op)</li> <li>vm.const.i32.zero (VM::ConstI32ZeroOp)</li> <li>vm.const.i64 (VM::ConstI64Op)</li> <li>vm.const.i64.zero (VM::ConstI64ZeroOp)</li> <li>vm.const.ref.rodata (VM::ConstRefRodataOp)</li> <li>vm.const.ref.zero (VM::ConstRefZeroOp)</li> <li>vm.rodata.inline (VM::RodataInlineOp)</li> <li>vm.rodata (VM::RodataOp)</li> <li>vm.rodata.table.inline (VM::RodataTableInlineOp)</li> </ul> </li> <li>Control flow ops<ul> <li>vm.br (VM::BranchOp)</li> <li>vm.br_table (VM::BranchTableOp)</li> <li>vm.call (VM::CallOp)</li> <li>vm.call.variadic (VM::CallVariadicOp)</li> <li>vm.check.eq (VM::CheckEQOp)</li> <li>vm.check.ne (VM::CheckNEOp)</li> <li>vm.check.nz (VM::CheckNZOp)</li> <li>vm.check.nearly_eq (VM::CheckNearlyEQOp)</li> <li>vm.cond_br (VM::CondBranchOp)</li> <li>vm.cond_fail (VM::CondFailOp)</li> <li>vm.fail (VM::FailOp)</li> <li>vm.import.resolved (VM::ImportResolvedOp)</li> <li>vm.return (VM::ReturnOp)</li> </ul> </li> <li>Debugging ops<ul> <li>vm.break (VM::BreakOp)</li> <li>vm.cond_break (VM::CondBreakOp)</li> <li>vm.print (VM::PrintOp)</li> <li>vm.trace (VM::TraceOp)</li> </ul> </li> <li>Floating-point arithmetic ops<ul> <li>vm.abs.f32 (VM::AbsF32Op)</li> <li>vm.abs.f64 (VM::AbsF64Op)</li> <li>vm.add.f32 (VM::AddF32Op)</li> <li>vm.add.f64 (VM::AddF64Op)</li> <li>vm.ceil.f32 (VM::CeilF32Op)</li> <li>vm.ceil.f64 (VM::CeilF64Op)</li> <li>vm.div.f32 (VM::DivF32Op)</li> <li>vm.div.f64 (VM::DivF64Op)</li> <li>vm.fma.f32 (VM::FMAF32Op)</li> <li>vm.fma.f64 (VM::FMAF64Op)</li> <li>vm.floor.f32 (VM::FloorF32Op)</li> <li>vm.floor.f64 (VM::FloorF64Op)</li> <li>vm.max.f32 (VM::MaxF32Op)</li> <li>vm.max.f64 (VM::MaxF64Op)</li> <li>vm.min.f32 (VM::MinF32Op)</li> <li>vm.min.f64 (VM::MinF64Op)</li> <li>vm.mul.f32 (VM::MulF32Op)</li> <li>vm.mul.f64 (VM::MulF64Op)</li> <li>vm.neg.f32 (VM::NegF32Op)</li> <li>vm.neg.f64 (VM::NegF64Op)</li> <li>vm.rem.f32 (VM::RemF32Op)</li> <li>vm.rem.f64 (VM::RemF64Op)</li> <li>vm.round.f32.even (VM::RoundF32EvenOp)</li> <li>vm.round.f32 (VM::RoundF32Op)</li> <li>vm.round.f64.even (VM::RoundF64EvenOp)</li> <li>vm.round.f64 (VM::RoundF64Op)</li> <li>vm.sub.f32 (VM::SubF32Op)</li> <li>vm.sub.f64 (VM::SubF64Op)</li> </ul> </li> <li>Floating-point comparison ops<ul> <li>vm.cmp.eq.f32.near (VM::CmpEQF32NearOp)</li> <li>vm.cmp.eq.f32.o (VM::CmpEQF32OOp)</li> <li>vm.cmp.eq.f32.u (VM::CmpEQF32UOp)</li> <li>vm.cmp.eq.f64.near (VM::CmpEQF64NearOp)</li> <li>vm.cmp.eq.f64.o (VM::CmpEQF64OOp)</li> <li>vm.cmp.eq.f64.u (VM::CmpEQF64UOp)</li> <li>vm.cmp.gte.f32.o (VM::CmpGTEF32OOp)</li> <li>vm.cmp.gte.f32.u (VM::CmpGTEF32UOp)</li> <li>vm.cmp.gte.f64.o (VM::CmpGTEF64OOp)</li> <li>vm.cmp.gte.f64.u (VM::CmpGTEF64UOp)</li> <li>vm.cmp.gt.f32.o (VM::CmpGTF32OOp)</li> <li>vm.cmp.gt.f32.u (VM::CmpGTF32UOp)</li> <li>vm.cmp.gt.f64.o (VM::CmpGTF64OOp)</li> <li>vm.cmp.gt.f64.u (VM::CmpGTF64UOp)</li> <li>vm.cmp.lte.f32.o (VM::CmpLTEF32OOp)</li> <li>vm.cmp.lte.f32.u (VM::CmpLTEF32UOp)</li> <li>vm.cmp.lte.f64.o (VM::CmpLTEF64OOp)</li> <li>vm.cmp.lte.f64.u (VM::CmpLTEF64UOp)</li> <li>vm.cmp.lt.f32.o (VM::CmpLTF32OOp)</li> <li>vm.cmp.lt.f32.u (VM::CmpLTF32UOp)</li> <li>vm.cmp.lt.f64.o (VM::CmpLTF64OOp)</li> <li>vm.cmp.lt.f64.u (VM::CmpLTF64UOp)</li> <li>vm.cmp.ne.f32.o (VM::CmpNEF32OOp)</li> <li>vm.cmp.ne.f32.u (VM::CmpNEF32UOp)</li> <li>vm.cmp.ne.f64.o (VM::CmpNEF64OOp)</li> <li>vm.cmp.ne.f64.u (VM::CmpNEF64UOp)</li> <li>vm.cmp.nz.f32.o (VM::CmpNZF32OOp)</li> <li>vm.cmp.nz.f32.u (VM::CmpNZF32UOp)</li> <li>vm.cmp.nz.f64.o (VM::CmpNZF64OOp)</li> <li>vm.cmp.nz.f64.u (VM::CmpNZF64UOp)</li> <li>vm.cmp.nan.f32 (VM::CmpNaNF32Op)</li> <li>vm.cmp.nan.f64 (VM::CmpNaNF64Op)</li> </ul> </li> <li>Floating-point math ops<ul> <li>vm.atan2.f32 (VM::Atan2F32Op)</li> <li>vm.atan2.f64 (VM::Atan2F64Op)</li> <li>vm.atan.f32 (VM::AtanF32Op)</li> <li>vm.atan.f64 (VM::AtanF64Op)</li> <li>vm.cos.f32 (VM::CosF32Op)</li> <li>vm.cos.f64 (VM::CosF64Op)</li> <li>vm.erf.f32 (VM::ErfF32Op)</li> <li>vm.erf.f64 (VM::ErfF64Op)</li> <li>vm.exp2.f32 (VM::Exp2F32Op)</li> <li>vm.exp2.f64 (VM::Exp2F64Op)</li> <li>vm.exp.f32 (VM::ExpF32Op)</li> <li>vm.exp.f64 (VM::ExpF64Op)</li> <li>vm.expm1.f32 (VM::ExpM1F32Op)</li> <li>vm.expm1.f64 (VM::ExpM1F64Op)</li> <li>vm.log10.f32 (VM::Log10F32Op)</li> <li>vm.log10.f64 (VM::Log10F64Op)</li> <li>vm.log1p.f32 (VM::Log1pF32Op)</li> <li>vm.log1p.f64 (VM::Log1pF64Op)</li> <li>vm.log2.f32 (VM::Log2F32Op)</li> <li>vm.log2.f64 (VM::Log2F64Op)</li> <li>vm.log.f32 (VM::LogF32Op)</li> <li>vm.log.f64 (VM::LogF64Op)</li> <li>vm.pow.f32 (VM::PowF32Op)</li> <li>vm.pow.f64 (VM::PowF64Op)</li> <li>vm.rsqrt.f32 (VM::RsqrtF32Op)</li> <li>vm.rsqrt.f64 (VM::RsqrtF64Op)</li> <li>vm.sin.f32 (VM::SinF32Op)</li> <li>vm.sin.f64 (VM::SinF64Op)</li> <li>vm.sqrt.f32 (VM::SqrtF32Op)</li> <li>vm.sqrt.f64 (VM::SqrtF64Op)</li> <li>vm.tanh.f32 (VM::TanhF32Op)</li> <li>vm.tanh.f64 (VM::TanhF64Op)</li> </ul> </li> <li>Global ops<ul> <li>vm.global.address (VM::GlobalAddressOp)</li> <li>vm.global.f32 (VM::GlobalF32Op)</li> <li>vm.global.f64 (VM::GlobalF64Op)</li> <li>vm.global.i32 (VM::GlobalI32Op)</li> <li>vm.global.i64 (VM::GlobalI64Op)</li> <li>vm.global.load.f32 (VM::GlobalLoadF32Op)</li> <li>vm.global.load.f64 (VM::GlobalLoadF64Op)</li> <li>vm.global.load.i32 (VM::GlobalLoadI32Op)</li> <li>vm.global.load.i64 (VM::GlobalLoadI64Op)</li> <li>vm.global.load.indirect.f32 (VM::GlobalLoadIndirectF32Op)</li> <li>vm.global.load.indirect.f64 (VM::GlobalLoadIndirectF64Op)</li> <li>vm.global.load.indirect.i32 (VM::GlobalLoadIndirectI32Op)</li> <li>vm.global.load.indirect.i64 (VM::GlobalLoadIndirectI64Op)</li> <li>vm.global.load.indirect.ref (VM::GlobalLoadIndirectRefOp)</li> <li>vm.global.load.ref (VM::GlobalLoadRefOp)</li> <li>vm.global.ref (VM::GlobalRefOp)</li> <li>vm.global.store.f32 (VM::GlobalStoreF32Op)</li> <li>vm.global.store.f64 (VM::GlobalStoreF64Op)</li> <li>vm.global.store.i32 (VM::GlobalStoreI32Op)</li> <li>vm.global.store.i64 (VM::GlobalStoreI64Op)</li> <li>vm.global.store.indirect.f32 (VM::GlobalStoreIndirectF32Op)</li> <li>vm.global.store.indirect.f64 (VM::GlobalStoreIndirectF64Op)</li> <li>vm.global.store.indirect.i32 (VM::GlobalStoreIndirectI32Op)</li> <li>vm.global.store.indirect.i64 (VM::GlobalStoreIndirectI64Op)</li> <li>vm.global.store.indirect.ref (VM::GlobalStoreIndirectRefOp)</li> <li>vm.global.store.ref (VM::GlobalStoreRefOp)</li> </ul> </li> <li>Integer arithmetic ops<ul> <li>vm.abs.i32 (VM::AbsI32Op)</li> <li>vm.abs.i64 (VM::AbsI64Op)</li> <li>vm.add.i32 (VM::AddI32Op)</li> <li>vm.add.i64 (VM::AddI64Op)</li> <li>vm.div.i32.s (VM::DivI32SOp)</li> <li>vm.div.i32.u (VM::DivI32UOp)</li> <li>vm.div.i64.s (VM::DivI64SOp)</li> <li>vm.div.i64.u (VM::DivI64UOp)</li> <li>vm.fma.i32 (VM::FMAI32Op)</li> <li>vm.fma.i64 (VM::FMAI64Op)</li> <li>vm.max.i32.s (VM::MaxI32SOp)</li> <li>vm.max.i32.u (VM::MaxI32UOp)</li> <li>vm.max.i64.s (VM::MaxI64SOp)</li> <li>vm.max.i64.u (VM::MaxI64UOp)</li> <li>vm.min.i32.s (VM::MinI32SOp)</li> <li>vm.min.i32.u (VM::MinI32UOp)</li> <li>vm.min.i64.s (VM::MinI64SOp)</li> <li>vm.min.i64.u (VM::MinI64UOp)</li> <li>vm.mul.i32 (VM::MulI32Op)</li> <li>vm.mul.i64 (VM::MulI64Op)</li> <li>vm.rem.i32.s (VM::RemI32SOp)</li> <li>vm.rem.i32.u (VM::RemI32UOp)</li> <li>vm.rem.i64.s (VM::RemI64SOp)</li> <li>vm.rem.i64.u (VM::RemI64UOp)</li> <li>vm.sub.i32 (VM::SubI32Op)</li> <li>vm.sub.i64 (VM::SubI64Op)</li> </ul> </li> <li>Integer bit manipulation ops<ul> <li>vm.and.i32 (VM::AndI32Op)</li> <li>vm.and.i64 (VM::AndI64Op)</li> <li>vm.ctlz.i32 (VM::CtlzI32Op)</li> <li>vm.ctlz.i64 (VM::CtlzI64Op)</li> <li>vm.not.i32 (VM::NotI32Op)</li> <li>vm.not.i64 (VM::NotI64Op)</li> <li>vm.or.i32 (VM::OrI32Op)</li> <li>vm.or.i64 (VM::OrI64Op)</li> <li>vm.xor.i32 (VM::XorI32Op)</li> <li>vm.xor.i64 (VM::XorI64Op)</li> </ul> </li> <li>List ops<ul> <li>vm.list.alloc (VM::ListAllocOp)</li> <li>vm.list.get.f32 (VM::ListGetF32Op)</li> <li>vm.list.get.f64 (VM::ListGetF64Op)</li> <li>vm.list.get.i32 (VM::ListGetI32Op)</li> <li>vm.list.get.i64 (VM::ListGetI64Op)</li> <li>vm.list.get.ref (VM::ListGetRefOp)</li> <li>vm.list.reserve (VM::ListReserveOp)</li> <li>vm.list.resize (VM::ListResizeOp)</li> <li>vm.list.set.f32 (VM::ListSetF32Op)</li> <li>vm.list.set.f64 (VM::ListSetF64Op)</li> <li>vm.list.set.i32 (VM::ListSetI32Op)</li> <li>vm.list.set.i64 (VM::ListSetI64Op)</li> <li>vm.list.set.ref (VM::ListSetRefOp)</li> <li>vm.list.size (VM::ListSizeOp)</li> </ul> </li> <li>Ref comparison ops<ul> <li>vm.cmp.eq.ref (VM::CmpEQRefOp)</li> <li>vm.cmp.ne.ref (VM::CmpNERefOp)</li> <li>vm.cmp.nz.ref (VM::CmpNZRefOp)</li> </ul> </li> <li>Structural ops<ul> <li>vm.export (VM::ExportOp)</li> <li>vm.func (VM::FuncOp)</li> <li>vm.import (VM::ImportOp)</li> <li>vm.initializer (VM::InitializerOp)</li> <li>vm.module (VM::ModuleOp)</li> <li>vm.module_terminator (VM::ModuleTerminatorOp)</li> </ul> </li> </ul> </li> <li>Attributes<ul> <li>OrdinalCountsAttr</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/VM/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/VM/#asyncfiber-ops","title":"Async/fiber ops","text":""},{"location":"reference/mlir-dialects/VM/#vmyield-vmyieldop","title":"<code>vm.yield</code> (VM::YieldOp)","text":"<p>Unconditional fiber yield operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.yield` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Yields the fiber for some (likely short) amount of time. This can be used to  perform cooperative scheduling and ensure fair (enough) execution. Execution  resumes at the specified target branch.</p> <p><code>^bb0:    vm.yield ^on_resume  ^on_resume:    ...</code></p> <p>Traits: <code>HasParent&lt;IREE::VM::FuncOp&gt;</code>, <code>Terminator</code>, <code>Util_YieldPoint</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#bitwise-shift-and-rotate-ops","title":"Bitwise shift and rotate ops","text":""},{"location":"reference/mlir-dialects/VM/#vmshli32-vmshli32op","title":"<code>vm.shl.i32</code> (VM::ShlI32Op)","text":"<p>Integer shift left operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shl.i32` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_1","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshli64-vmshli64op","title":"<code>vm.shl.i64</code> (VM::ShlI64Op)","text":"<p>Integer shift left operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shl.i64` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_2","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_1","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32s-vmshri32sop","title":"<code>vm.shr.i32.s</code> (VM::ShrI32SOp)","text":"<p>Signed integer (arithmetic) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i32.s` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_3","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_2","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32u-vmshri32uop","title":"<code>vm.shr.i32.u</code> (VM::ShrI32UOp)","text":"<p>Unsigned integer (logical) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i32.u` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_4","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_3","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64s-vmshri64sop","title":"<code>vm.shr.i64.s</code> (VM::ShrI64SOp)","text":"<p>Signed integer (arithmetic) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i64.s` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_5","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_4","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64u-vmshri64uop","title":"<code>vm.shr.i64.u</code> (VM::ShrI64UOp)","text":"<p>Unsigned integer (logical) shift right operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.shr.i64.u` $operand `,` $amount attr-dict `:` type($operand)\n</code></pre> <p>Shifts the operand in a direction by the number of bits specified.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_6","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer <code>amount</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_5","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbufferalloc-vmbufferallocop","title":"<code>vm.buffer.alloc</code> (VM::BufferAllocOp)","text":"<p>Allocates a new zero-initialized buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.alloc` operands attr-dict `:` type($result)\n</code></pre> <p>Allocates a new zero-initialized buffer with the given size in bytes.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_7","title":"Operands:","text":"Operand Description <code>length</code> 64-bit signless integer <code>alignment</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_6","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmbufferclone-vmbuffercloneop","title":"<code>vm.buffer.clone</code> (VM::BufferCloneOp)","text":"<p>Clones a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.clone` operands attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Clones a range of the source buffer to produce a mutable buffer with the same contents.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_8","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>alignment</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_7","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmbuffercompare-vmbuffercompareop","title":"<code>vm.buffer.compare</code> (VM::BufferCompareOp)","text":"<p>Compares a range of a buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.compare` operands attr-dict `:` type($lhs_buffer) `,` type($rhs_buffer)\n</code></pre> <p>Returns 1 if the two ranges are bitwise equivalent, somewhat like memcmp.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_9","title":"Operands:","text":"Operand Description <code>lhs_buffer</code> ref <code>lhs_offset</code> 64-bit signless integer <code>rhs_buffer</code> ref <code>rhs_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_8","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbuffercopy-vmbuffercopyop","title":"<code>vm.buffer.copy</code> (VM::BufferCopyOp)","text":"<p>Copies a range of a buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.copy` operands attr-dict `:` type($source_buffer) `-&gt;` type($target_buffer)\n</code></pre> <p>Copies a range of one buffer to another, like memcpy.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_10","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf32-vmbufferfillf32op","title":"<code>vm.buffer.fill.f32</code> (VM::BufferFillF32Op)","text":"<p>Fills the buffer with the given repeating 32-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.f32` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_11","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf64-vmbufferfillf64op","title":"<code>vm.buffer.fill.f64</code> (VM::BufferFillF64Op)","text":"<p>Fills the buffer with the given repeating 64-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.f64` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_12","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli16-vmbufferfilli16op","title":"<code>vm.buffer.fill.i16</code> (VM::BufferFillI16Op)","text":"<p>Fills the buffer with the given repeating 16-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i16` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_13","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli32-vmbufferfilli32op","title":"<code>vm.buffer.fill.i32</code> (VM::BufferFillI32Op)","text":"<p>Fills the buffer with the given repeating 32-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i32` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_14","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli64-vmbufferfilli64op","title":"<code>vm.buffer.fill.i64</code> (VM::BufferFillI64Op)","text":"<p>Fills the buffer with the given repeating 64-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i64` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_15","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli8-vmbufferfilli8op","title":"<code>vm.buffer.fill.i8</code> (VM::BufferFillI8Op)","text":"<p>Fills the buffer with the given repeating 8-bit value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.fill.i8` $target_buffer `,` $target_offset `,` $length `,` $value\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Fills an element range of the buffer with the given value, like memset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_16","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer <code>value</code> 8-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferhash-vmbufferhashop","title":"<code>vm.buffer.hash</code> (VM::BufferHashOp)","text":"<p>Syntax:</p> <pre><code>operation ::= `vm.buffer.hash` $source_buffer `,` $source_offset `,` $length\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Computes the SipHash-2-4 of the source buffer at the given offset for |length| bytes using seed <code>0x0001020304...0e0f</code>.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_17","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer <code>length</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_9","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferlength-vmbufferlengthop","title":"<code>vm.buffer.length</code> (VM::BufferLengthOp)","text":"<p>Returns the byte length of a buffer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.length` operands attr-dict `:` type($buffer) `-&gt;` type($result)\n</code></pre> <p>Returns the total byte length of the given buffer. This is the exact value as specified during buffer allocation though the underlying system buffer may have additional padding.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_18","title":"Operands:","text":"Operand Description <code>buffer</code> ref"},{"location":"reference/mlir-dialects/VM/#results_10","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf32-vmbufferloadf32op","title":"<code>vm.buffer.load.f32</code> (VM::BufferLoadF32Op)","text":"<p>32-bit floating-point load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.f32` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_19","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_11","title":"Results:","text":"Result Description <code>result</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf64-vmbufferloadf64op","title":"<code>vm.buffer.load.f64</code> (VM::BufferLoadF64Op)","text":"<p>64-bit floating-point load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.f64` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_20","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_12","title":"Results:","text":"Result Description <code>result</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16s-vmbufferloadi16sop","title":"<code>vm.buffer.load.i16.s</code> (VM::BufferLoadI16SOp)","text":"<p>Signed 16-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i16.s` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_21","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_13","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16u-vmbufferloadi16uop","title":"<code>vm.buffer.load.i16.u</code> (VM::BufferLoadI16UOp)","text":"<p>Unsigned 16-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i16.u` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_22","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_14","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi32-vmbufferloadi32op","title":"<code>vm.buffer.load.i32</code> (VM::BufferLoadI32Op)","text":"<p>32-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i32` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_23","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_15","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi64-vmbufferloadi64op","title":"<code>vm.buffer.load.i64</code> (VM::BufferLoadI64Op)","text":"<p>64-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i64` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_24","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_16","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8s-vmbufferloadi8sop","title":"<code>vm.buffer.load.i8.s</code> (VM::BufferLoadI8SOp)","text":"<p>Signed 8-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i8.s` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_25","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_17","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8u-vmbufferloadi8uop","title":"<code>vm.buffer.load.i8.u</code> (VM::BufferLoadI8UOp)","text":"<p>Unsigned 8-bit integer load</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.load.i8.u` $source_buffer `[` $source_offset `]`\n              attr-dict `:` type($source_buffer) `-&gt;` type($result)\n</code></pre> <p>Loads a value from the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_26","title":"Operands:","text":"Operand Description <code>source_buffer</code> ref <code>source_offset</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_18","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref32-vmbufferstoref32op","title":"<code>vm.buffer.store.f32</code> (VM::BufferStoreF32Op)","text":"<p>32-bit floating-point store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.f32` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_27","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref64-vmbufferstoref64op","title":"<code>vm.buffer.store.f64</code> (VM::BufferStoreF64Op)","text":"<p>64-bit floating-point store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.f64` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_28","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei16-vmbufferstorei16op","title":"<code>vm.buffer.store.i16</code> (VM::BufferStoreI16Op)","text":"<p>Unsigned 16-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i16` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_29","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei32-vmbufferstorei32op","title":"<code>vm.buffer.store.i32</code> (VM::BufferStoreI32Op)","text":"<p>32-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i32` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_30","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei64-vmbufferstorei64op","title":"<code>vm.buffer.store.i64</code> (VM::BufferStoreI64Op)","text":"<p>64-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i64` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_31","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei8-vmbufferstorei8op","title":"<code>vm.buffer.store.i8</code> (VM::BufferStoreI8Op)","text":"<p>Unsigned 8-bit integer store</p> <p>Syntax:</p> <pre><code>operation ::= `vm.buffer.store.i8` $value `,` $target_buffer `[` $target_offset `]`\n              attr-dict `:` type($value) `-&gt;` type($target_buffer)\n</code></pre> <p>Stores a value to the buffer at the given element offset.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_32","title":"Operands:","text":"Operand Description <code>target_buffer</code> ref <code>target_offset</code> 64-bit signless integer <code>value</code> 32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#casting-and-conversion-ops","title":"Casting and conversion ops","text":"<p>Casting and type conversion/emulation.</p>"},{"location":"reference/mlir-dialects/VM/#vmbitcastf32i32-vmbitcastf32i32op","title":"<code>vm.bitcast.f32.i32</code> (VM::BitcastF32I32Op)","text":"<p>Bitcast from a 32-bit float-point value to a 32-bit integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.f32.i32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_33","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_19","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcastf64i64-vmbitcastf64i64op","title":"<code>vm.bitcast.f64.i64</code> (VM::BitcastF64I64Op)","text":"<p>Bitcast from a 64-bit float-point value to a 64-bit integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.f64.i64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_34","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_20","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcasti32f32-vmbitcasti32f32op","title":"<code>vm.bitcast.i32.f32</code> (VM::BitcastI32F32Op)","text":"<p>Bitcast from a 32-bit integer to a 32-bit float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.i32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_35","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_21","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmbitcasti64f64-vmbitcasti64f64op","title":"<code>vm.bitcast.i64.f64</code> (VM::BitcastI64F64Op)","text":"<p>Bitcast from a 64-bit integer to a 64-bit float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.bitcast.i64.f64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_36","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_22","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastanyref-vmcastanyrefop","title":"<code>vm.cast.any.ref</code> (VM::CastAnyRefOp)","text":"<p>Casts from any ref to a specific ref type</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.any.ref` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Performs a runtime cast of an opaque <code>!vm.ref&lt;?&gt;</code> to a specific <code>!vm.ref&lt;T&gt;</code> and raises an error if the operand does not match the expected type. Null refs can always be cast between types.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_37","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_23","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmcastf32si32-vmcastf32si32op","title":"<code>vm.cast.f32.si32</code> (VM::CastF32SI32Op)","text":"<p>Cast from a float-point value to a signed integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.f32.si32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_38","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_24","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastf32ui32-vmcastf32ui32op","title":"<code>vm.cast.f32.ui32</code> (VM::CastF32UI32Op)","text":"<p>Cast from an float-point value to an unsigned integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.f32.ui32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_39","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_25","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastrefany-vmcastrefanyop","title":"<code>vm.cast.ref.any</code> (VM::CastRefAnyOp)","text":"<p>Casts from a specific ref to any ref type</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.ref.any` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Performs a compile-time widening cast of a specific <code>!vm.ref&lt;T&gt;</code> to an opaque <code>!vm.ref&lt;?&gt;</code>.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_AssignmentOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_40","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_26","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmcastsi32f32-vmcastsi32f32op","title":"<code>vm.cast.si32.f32</code> (VM::CastSI32F32Op)","text":"<p>Cast from a signed integer to a float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.si32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_41","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_27","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastui32f32-vmcastui32f32op","title":"<code>vm.cast.ui32.f32</code> (VM::CastUI32F32Op)","text":"<p>Cast from an unsigned integer to a float-point value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cast.ui32.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_42","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_28","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmextf32f64-vmextf32f64op","title":"<code>vm.ext.f32.f64</code> (VM::ExtF32F64Op)","text":"<p>Floating-point zero extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.f32.f64` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_43","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_29","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexti16i32s-vmexti16i32sop","title":"<code>vm.ext.i16.i32.s</code> (VM::ExtI16I32SOp)","text":"<p>Integer sign extend 16 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i32.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_44","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_30","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i32u-vmexti16i32uop","title":"<code>vm.ext.i16.i32.u</code> (VM::ExtI16I32UOp)","text":"<p>Integer zero extend 16 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i32.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_45","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_31","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64s-vmexti16i64sop","title":"<code>vm.ext.i16.i64.s</code> (VM::ExtI16I64SOp)","text":"<p>Integer sign extend 16 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_46","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_32","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64u-vmexti16i64uop","title":"<code>vm.ext.i16.i64.u</code> (VM::ExtI16I64UOp)","text":"<p>Integer zero extend 16 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i16.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_47","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_33","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64s-vmexti32i64sop","title":"<code>vm.ext.i32.i64.s</code> (VM::ExtI32I64SOp)","text":"<p>Integer sign extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i32.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_48","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_34","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64u-vmexti32i64uop","title":"<code>vm.ext.i32.i64.u</code> (VM::ExtI32I64UOp)","text":"<p>Integer zero extend 32 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i32.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_49","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_35","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32s-vmexti8i32sop","title":"<code>vm.ext.i8.i32.s</code> (VM::ExtI8I32SOp)","text":"<p>Integer sign extend 8 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i32.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_50","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_36","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32u-vmexti8i32uop","title":"<code>vm.ext.i8.i32.u</code> (VM::ExtI8I32UOp)","text":"<p>Integer zero extend 8 bits to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i32.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_51","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_37","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64s-vmexti8i64sop","title":"<code>vm.ext.i8.i64.s</code> (VM::ExtI8I64SOp)","text":"<p>Integer sign extend 8 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i64.s` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_52","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_38","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64u-vmexti8i64uop","title":"<code>vm.ext.i8.i64.u</code> (VM::ExtI8I64UOp)","text":"<p>Integer zero extend 8 bits to 64 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ext.i8.i64.u` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_53","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_39","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtruncf64f32-vmtruncf64f32op","title":"<code>vm.trunc.f64.f32</code> (VM::TruncF64F32Op)","text":"<p>Floating-point truncate to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.f64.f32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_54","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_40","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtrunci16i8-vmtrunci16i8op","title":"<code>vm.trunc.i16.i8</code> (VM::TruncI16I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i16.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_55","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_41","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i16-vmtrunci32i16op","title":"<code>vm.trunc.i32.i16</code> (VM::TruncI32I16Op)","text":"<p>Integer truncate to 16 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i32.i16` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_56","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_42","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i8-vmtrunci32i8op","title":"<code>vm.trunc.i32.i8</code> (VM::TruncI32I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i32.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_57","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_43","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i16-vmtrunci64i16op","title":"<code>vm.trunc.i64.i16</code> (VM::TruncI64I16Op)","text":"<p>Integer truncate to 16 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i16` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_58","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_44","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i32-vmtrunci64i32op","title":"<code>vm.trunc.i64.i32</code> (VM::TruncI64I32Op)","text":"<p>Integer truncate to 32 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i32` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_59","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_45","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i8-vmtrunci64i8op","title":"<code>vm.trunc.i64.i8</code> (VM::TruncI64I8Op)","text":"<p>Integer truncate to 8 bits</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trunc.i64.i8` $operand attr-dict `:` type($operand) `-&gt;` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_60","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_46","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#comparison-ops","title":"Comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqi32-vmcmpeqi32op","title":"<code>vm.cmp.eq.i32</code> (VM::CmpEQI32Op)","text":"<p>Integer equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.i32` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_61","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_47","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqi64-vmcmpeqi64op","title":"<code>vm.cmp.eq.i64</code> (VM::CmpEQI64Op)","text":"<p>Integer equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.i64` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_62","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_48","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32s-vmcmpgtei32sop","title":"<code>vm.cmp.gte.i32.s</code> (VM::CmpGTEI32SOp)","text":"<p>Signed integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_63","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_49","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32u-vmcmpgtei32uop","title":"<code>vm.cmp.gte.i32.u</code> (VM::CmpGTEI32UOp)","text":"<p>Unsigned integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_64","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_50","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64s-vmcmpgtei64sop","title":"<code>vm.cmp.gte.i64.s</code> (VM::CmpGTEI64SOp)","text":"<p>Signed integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_65","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_51","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64u-vmcmpgtei64uop","title":"<code>vm.cmp.gte.i64.u</code> (VM::CmpGTEI64UOp)","text":"<p>Unsigned integer greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_66","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_52","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32s-vmcmpgti32sop","title":"<code>vm.cmp.gt.i32.s</code> (VM::CmpGTI32SOp)","text":"<p>Signed integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_67","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_53","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32u-vmcmpgti32uop","title":"<code>vm.cmp.gt.i32.u</code> (VM::CmpGTI32UOp)","text":"<p>Unsigned integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_68","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_54","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64s-vmcmpgti64sop","title":"<code>vm.cmp.gt.i64.s</code> (VM::CmpGTI64SOp)","text":"<p>Signed integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_69","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_55","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64u-vmcmpgti64uop","title":"<code>vm.cmp.gt.i64.u</code> (VM::CmpGTI64UOp)","text":"<p>Unsigned integer greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_70","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_56","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32s-vmcmpltei32sop","title":"<code>vm.cmp.lte.i32.s</code> (VM::CmpLTEI32SOp)","text":"<p>Signed integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_71","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_57","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32u-vmcmpltei32uop","title":"<code>vm.cmp.lte.i32.u</code> (VM::CmpLTEI32UOp)","text":"<p>Unsigned integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_72","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_58","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64s-vmcmpltei64sop","title":"<code>vm.cmp.lte.i64.s</code> (VM::CmpLTEI64SOp)","text":"<p>Signed integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_73","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_59","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64u-vmcmpltei64uop","title":"<code>vm.cmp.lte.i64.u</code> (VM::CmpLTEI64UOp)","text":"<p>Unsigned integer less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_74","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_60","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32s-vmcmplti32sop","title":"<code>vm.cmp.lt.i32.s</code> (VM::CmpLTI32SOp)","text":"<p>Signed integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i32.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_75","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_61","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32u-vmcmplti32uop","title":"<code>vm.cmp.lt.i32.u</code> (VM::CmpLTI32UOp)","text":"<p>Unsigned integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_76","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_62","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64s-vmcmplti64sop","title":"<code>vm.cmp.lt.i64.s</code> (VM::CmpLTI64SOp)","text":"<p>Signed integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i64.s` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_77","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_63","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64u-vmcmplti64uop","title":"<code>vm.cmp.lt.i64.u</code> (VM::CmpLTI64UOp)","text":"<p>Unsigned integer less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.i64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_78","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_64","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei32-vmcmpnei32op","title":"<code>vm.cmp.ne.i32</code> (VM::CmpNEI32Op)","text":"<p>Integer inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.i32` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_79","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_65","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei64-vmcmpnei64op","title":"<code>vm.cmp.ne.i64</code> (VM::CmpNEI64Op)","text":"<p>Integer inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.i64` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_80","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_66","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi32-vmcmpnzi32op","title":"<code>vm.cmp.nz.i32</code> (VM::CmpNZI32Op)","text":"<p>Integer non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.i32` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given integer operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_81","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_67","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi64-vmcmpnzi64op","title":"<code>vm.cmp.nz.i64</code> (VM::CmpNZI64Op)","text":"<p>Integer non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.i64` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given integer operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_82","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_68","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#conditional-assignment-ops","title":"Conditional assignment ops","text":""},{"location":"reference/mlir-dialects/VM/#vmselectf32-vmselectf32op","title":"<code>vm.select.f32</code> (VM::SelectF32Op)","text":"<p>Floating-point select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_83","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 32-bit float <code>false_value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_69","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmselectf64-vmselectf64op","title":"<code>vm.select.f64</code> (VM::SelectF64Op)","text":"<p>Floating-point select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_84","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 64-bit float <code>false_value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_70","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmselecti32-vmselecti32op","title":"<code>vm.select.i32</code> (VM::SelectI32Op)","text":"<p>Integer select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_85","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 32-bit signless integer <code>false_value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_71","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselecti64-vmselecti64op","title":"<code>vm.select.i64</code> (VM::SelectI64Op)","text":"<p>Integer select operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.select.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:</p> <pre><code>%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_86","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> 64-bit signless integer <code>false_value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_72","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselectref-vmselectrefop","title":"<code>vm.select.ref</code> (VM::SelectRefOp)","text":"<p>Ref select operation <p>Syntax:</p> <pre><code>operation ::= `vm.select.ref` operands attr-dict `:` type($result)\n</code></pre> <p>Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the <code>true_value</code> operand is chosen, otherwise the <code>false_value</code> operand is chosen.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_87","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>true_value</code> ref <code>false_value</code> ref"},{"location":"reference/mlir-dialects/VM/#results_73","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmswitchf32-vmswitchf32op","title":"<code>vm.switch.f32</code> (VM::SwitchF32Op)","text":"<p>Floating-point switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.f32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_88","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 32-bit float <code>values</code> variadic of 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_74","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchf64-vmswitchf64op","title":"<code>vm.switch.f64</code> (VM::SwitchF64Op)","text":"<p>Floating-point switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.f64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_89","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 64-bit float <code>values</code> variadic of 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_75","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchi32-vmswitchi32op","title":"<code>vm.switch.i32</code> (VM::SwitchI32Op)","text":"<p>Integer switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.i32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_90","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 32-bit signless integer <code>values</code> variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_76","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchi64-vmswitchi64op","title":"<code>vm.switch.i64</code> (VM::SwitchI64Op)","text":"<p>Integer switch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.switch.i64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n</code></pre> <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (&lt;0 or &gt;2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_91","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> 64-bit signless integer <code>values</code> variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_77","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchref-vmswitchrefop","title":"<code>vm.switch.ref</code> (VM::SwitchRefOp)","text":"<p>Ref switch operation <p>Returns the value with the given <code>index</code> in <code>values</code> or <code>default_value</code> if the index is out of bounds.</p> <pre><code>// Switch %arg0 to cases of %r0/%r1/%r2 if arg0==0, ==1, ==2.\n// If %arg0 is out of range (&lt;0 or &gt;2) then default to %null.\n%0 = vm.switch.ref %index[%r0, %r1, %r2] else %null : vm.ref&lt;!foo&gt;\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_92","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>default_value</code> ref <code>values</code> variadic of ref"},{"location":"reference/mlir-dialects/VM/#results_78","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#constant-ops","title":"Constant ops","text":""},{"location":"reference/mlir-dialects/VM/#vmconstf32-vmconstf32op","title":"<code>vm.const.f32</code> (VM::ConstF32Op)","text":"<p>32-bit floating-point constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f32` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>FloatAttr32-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_79","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf32zero-vmconstf32zeroop","title":"<code>vm.const.f32.zero</code> (VM::ConstF32ZeroOp)","text":"<p>32-bit floating-point constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f32.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_80","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64-vmconstf64op","title":"<code>vm.const.f64</code> (VM::ConstF64Op)","text":"<p>64-bit floating-point constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f64` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>FloatAttr64-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_81","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64zero-vmconstf64zeroop","title":"<code>vm.const.f64.zero</code> (VM::ConstF64ZeroOp)","text":"<p>64-bit floating-point constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.f64.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_82","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconsti32-vmconsti32op","title":"<code>vm.const.i32</code> (VM::ConstI32Op)","text":"<p>32-bit integer constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i32` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>IntegerAttr32-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_83","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti32zero-vmconsti32zeroop","title":"<code>vm.const.i32.zero</code> (VM::ConstI32ZeroOp)","text":"<p>32-bit integer constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i32.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_84","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64-vmconsti64op","title":"<code>vm.const.i64</code> (VM::ConstI64Op)","text":"<p>64-bit integer constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i64` $value attr-dict\n</code></pre> <p>Defines a constant value that is treated as a scalar literal at runtime.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>value</code>IntegerAttr64-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_85","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64zero-vmconsti64zeroop","title":"<code>vm.const.i64.zero</code> (VM::ConstI64ZeroOp)","text":"<p>64-bit integer constant zero operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.i64.zero` attr-dict\n</code></pre> <p>Defines a constant zero primitive.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_86","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconstrefrodata-vmconstrefrodataop","title":"<code>vm.const.ref.rodata</code> (VM::ConstRefRodataOp)","text":"<p>Constant rodata access operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.ref.rodata` $rodata attr-dict `:` type($value)\n</code></pre> <p>Returns a reference to a read-only buffer.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>rodata</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_87","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmconstrefzero-vmconstrefzeroop","title":"<code>vm.const.ref.zero</code> (VM::ConstRefZeroOp)","text":"<p>Null ref constant operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.const.ref.zero` `:` type($result) attr-dict\n</code></pre> <p>Defines a constant null ref that can be used in comparisons and initialization.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ConstantLike</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#results_88","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmrodatainline-vmrodatainlineop","title":"<code>vm.rodata.inline</code> (VM::RodataInlineOp)","text":"<p>Inlined constant rodata</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata.inline` ($name^)? attr-dict `:` type($result) `=` $value\n</code></pre> <p>vm.rodata that can be embedded inline in functions. See vm.rodata for more information.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_89","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmrodata-vmrodataop","title":"<code>vm.rodata</code> (VM::RodataOp)","text":"<p>Read-only data definition operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata` custom&lt;SymbolVisibility&gt;($sym_visibility) $sym_name attr-dict $value\n</code></pre> <p>Defines a blob of read-only constant data that can be represented as a ref. This can be used to store arbitrary data within modules such as large constant buffers and other file contents.</p> <p>Note that the data is reference counted as a way to track its usage once the value leaves the module. For example, returning rodata from an exported function must keep the data (possibly backed by mmap) valid for its entire lifetime.</p> <p>By default all rodata will be aligned in the final module output at a 16-byte granularity. An optional alignment can be specified to override the default for cases where larger or smaller alignments are needed.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>value</code>::mlir::Attributebuffer-like constant attribute values <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>ordinal</code>::mlir::IntegerAttrordinal value <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#vmrodatatableinline-vmrodatatableinlineop","title":"<code>vm.rodata.table.inline</code> (VM::RodataTableInlineOp)","text":"<p>Inlined constant rodata table</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rodata.table.inline` $table_type attr-dict `:` type($table_result) `,` type($data_result) `=` $data_array\n</code></pre> <p>vm.rodata with another associated vm.rodata table specifying byte offsets and sizes as a subview into the flattened data. The table is a flat array of 32 or 64-bit integers storing (offset, size) in element order.</p> <p>The optional alignment attribute applies to both the table and data rodata. The data_alignment attribute can be used to specify an alignment for the elements of the table, padding to the data alignment with zeros. The element sizes reflect the unpadded attribute storage sizes.</p> <p>See vm.rodata for more information.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>table_name</code>::mlir::StringAttrstring attribute <code>data_name</code>::mlir::StringAttrstring attribute <code>table_type</code>::mlir::TypeAttrtype attribute of 32/64-bit integer <code>data_array</code>::mlir::ArrayAttrarray attribute of serializable attributes <code>alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>data_alignment</code>::mlir::IntegerAttr64-bit signless integer attribute <code>mime_type</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_90","title":"Results:","text":"Result Description <code>table_result</code> ref <code>data_result</code> ref"},{"location":"reference/mlir-dialects/VM/#control-flow-ops","title":"Control flow ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbr-vmbranchop","title":"<code>vm.br</code> (VM::BranchOp)","text":"<p>Unconditional branch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.br` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Represents an unconditional branch operation that branches to a target block  with the given set of arguments.</p> <p><code>^bb0(...):    vm.br ^bb1(%a)  ^bb1(%blockArg1):    ...</code></p> <p>Traits: <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_93","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_1","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmbr_table-vmbranchtableop","title":"<code>vm.br_table</code> (VM::BranchTableOp)","text":"<p>Branch table operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.br_table` $index ` ` `{` `\\n`\n              custom&lt;BranchTableCases&gt;(\n              $defaultDestination, $defaultOperands, type($defaultOperands),\n              $caseDestinations, $caseOperands, type($caseOperands))\n              `}`\n              attr-dict\n</code></pre> <p>Represents a branch table instructing execution to branch to the block with  the specified index. If the index is out of bounds then execution will  branch to the default block.</p> <p><code>vm.br_table %index {    default: ^bb1(%a : i64),    0: ^bb2,    1: ^bb3(%c : i64)  }</code></p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>AttrSizedOperandSegments</code>, <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>case_operand_segments</code>::mlir::DenseI32ArrayAttri32 dense array attribute"},{"location":"reference/mlir-dialects/VM/#operands_94","title":"Operands:","text":"Operand Description <code>index</code> 32-bit signless integer <code>defaultOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>caseOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_2","title":"Successors:","text":"Successor Description <code>defaultDestination</code> any successor <code>caseDestinations</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcall-vmcallop","title":"<code>vm.call</code> (VM::CallOp)","text":"<p>Call operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.call` $callee `(` operands `)` attr-dict `:` functional-type(operands, results)\n</code></pre> <p>Calls an internal VM function with the given arguments.</p> <p>Interfaces: <code>CallOpInterface</code>, <code>MemoryEffectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_95","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_91","title":"Results:","text":"Result Description <code>results</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcallvariadic-vmcallvariadicop","title":"<code>vm.call.variadic</code> (VM::CallVariadicOp)","text":"<p>Call operation with variadic arguments</p> <p>Calls an internal VM function with the given arguments. One or more of the arguments may be variadic, encoded as segmented sized operand lists.</p> <p>Variadic arguments must be specified with a total count in the segment_sizes attribute.</p> <p>Interfaces: <code>CallOpInterface</code>, <code>MemoryEffectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>callee</code>FlatSymbolRefAttrsymbol reference attribute <code>segment_sizes</code>::mlir::DenseIntElementsAttr16-bit signless integer elements attribute <code>segment_types</code>::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/VM/#operands_96","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_92","title":"Results:","text":"Result Description <code>results</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckeq-vmcheckeqop","title":"<code>vm.check.eq</code> (VM::CheckEQOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_97","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckne-vmcheckneop","title":"<code>vm.check.ne</code> (VM::CheckNEOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.ne` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_98","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknz-vmchecknzop","title":"<code>vm.check.nz</code> (VM::CheckNZOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.nz` $value (`,` $message^)? attr-dict `:` type($value)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_99","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknearly_eq-vmchecknearlyeqop","title":"<code>vm.check.nearly_eq</code> (VM::CheckNearlyEQOp)","text":"<p>Raises a global failure if the condition is true</p> <p>Syntax:</p> <pre><code>operation ::= `vm.check.nearly_eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n</code></pre> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.</p> <pre><code>vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref&lt;?&gt;\n</code></pre> <p>Traits: <code>Commutative</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_100","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>rhs</code> 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcond_br-vmcondbranchop","title":"<code>vm.cond_br</code> (VM::CondBranchOp)","text":"<p>Conditional branch operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cond_br` $condition `,`\n              $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`\n              $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?\n              attr-dict\n</code></pre> <p>Represents a conditional branch operation that branches to one of the two  target blocks with the given set of arguments.</p> <p><code>^bb0(...):    vm.cond_br %condition, ^bb1(%a), ^bb2(%b)  ^bb1(%blockArg1):    ...  ^bb2(%blockArg2):    ...</code></p> <p>Traits: <code>AttrSizedOperandSegments</code>, <code>Terminator</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_101","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>trueDestOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref <code>falseDestOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_3","title":"Successors:","text":"Successor Description <code>trueDest</code> any successor <code>falseDest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_fail-vmcondfailop","title":"<code>vm.cond_fail</code> (VM::CondFailOp)","text":"<p>Raises a global failure if the condition is true</p> <p>When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p> <p>This is implemented as a pseudo-op that transforms into a vm.fail operation guarded by the condition.</p> <pre><code>%nz = vm.cmp.nz.i32 %value : i32\n%statusCode = vm.const.i32 9\nvm.cond_fail %nz, %statusCode, \"expected non-zero\"\n</code></pre> <p>Traits: <code>VM_PseudoOp</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_102","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfail-vmfailop","title":"<code>vm.fail</code> (VM::FailOp)","text":"<p>Raises a global failure</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fail` $status (`,` $message^)? attr-dict\n</code></pre> <p>Signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.</p> <p>As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.</p> <pre><code>%statusCode = vm.const.i32 9\nvm.fail %statusCode, \"oh no!\"\n</code></pre> <p>Traits: <code>Terminator</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_103","title":"Operands:","text":"Operand Description <code>status</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmimportresolved-vmimportresolvedop","title":"<code>vm.import.resolved</code> (VM::ImportResolvedOp)","text":"<p>Returns true if an optional import was resolved at runtime</p> <p>Syntax:</p> <pre><code>operation ::= `vm.import.resolved` $import attr-dict `:` type($result)\n</code></pre> <p>Allows for checking whether a optional import was resolved at runtime. If this returns false then attempting to call the imported function will result in a failure at runtime.</p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>import</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_93","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmreturn-vmreturnop","title":"<code>vm.return</code> (VM::ReturnOp)","text":"<p>Return operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.return` attr-dict ($operands^ `:` type($operands))?\n</code></pre> <p>Represents a return operation within a function.</p> <pre><code>vm.func @foo(%0: i32, %1: f8) -&gt; (i32, f8) {\n  vm.return %0, %1 : i32, f8\n}\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>ReturnLike</code>, <code>Terminator</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>RegionBranchTerminatorOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_104","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#debugging-ops","title":"Debugging ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbreak-vmbreakop","title":"<code>vm.break</code> (VM::BreakOp)","text":"<p>Unconditional debug break operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.break` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n</code></pre> <p>Breaks into the attached debugger or asks for attaching a debugger. After resuming (or if a debugger is not attached) execution will continue at the target block.</p> <p>Traits: <code>Terminator</code>, <code>Util_YieldPoint</code>, <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_105","title":"Operands:","text":"Operand Description <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_4","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_break-vmcondbreakop","title":"<code>vm.cond_break</code> (VM::CondBreakOp)","text":"<p>Conditional debug break operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cond_break` $condition `,` $dest (`(` $destOperands^ `:` type($destOperands) `)`)?\n              attr-dict\n</code></pre> <p>Breaks into the attached debugger or asks for attaching a debugger if the provided condition is true. After resuming (or if a debugger is not attached) execution will continue at the target block.</p> <p>Traits: <code>Terminator</code>, <code>Util_YieldPoint</code>, <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>BranchOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_106","title":"Operands:","text":"Operand Description <code>condition</code> 32-bit signless integer <code>destOperands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_5","title":"Successors:","text":"Successor Description <code>dest</code> any successor"},{"location":"reference/mlir-dialects/VM/#vmprint-vmprintop","title":"<code>vm.print</code> (VM::PrintOp)","text":"<p>Message printing operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.print` $message `(` operands `)` attr-dict `:` type(operands)\n</code></pre> <p>Prints the given string message and zero or more values.</p> <p>Traits: <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>message</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_107","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmtrace-vmtraceop","title":"<code>vm.trace</code> (VM::TraceOp)","text":"<p>Trace value(s) operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.trace` $event_name `(` operands `)` attr-dict `:` type(operands)\n</code></pre> <p>Traces one or more values at the time the operation is executed. These values will be encoded into the active trace depending on the active trace verbosity setting.</p> <p>Traits: <code>VM_DebugOnly</code>, <code>VM_FullBarrier</code></p> <p>Interfaces: <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>event_name</code>::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_108","title":"Operands:","text":"Operand Description <code>operands</code> variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#floating-point-arithmetic-ops","title":"Floating-point arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsf32-vmabsf32op","title":"<code>vm.abs.f32</code> (VM::AbsF32Op)","text":"<p>Floating point absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_109","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_94","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmabsf64-vmabsf64op","title":"<code>vm.abs.f64</code> (VM::AbsF64Op)","text":"<p>Floating point absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_110","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_95","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf32-vmaddf32op","title":"<code>vm.add.f32</code> (VM::AddF32Op)","text":"<p>Floating-point add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_111","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_96","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf64-vmaddf64op","title":"<code>vm.add.f64</code> (VM::AddF64Op)","text":"<p>Floating-point add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_112","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_97","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf32-vmceilf32op","title":"<code>vm.ceil.f32</code> (VM::CeilF32Op)","text":"<p>Floating point ceiling operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ceil.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_113","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_98","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf64-vmceilf64op","title":"<code>vm.ceil.f64</code> (VM::CeilF64Op)","text":"<p>Floating point ceiling operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ceil.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_114","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_99","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf32-vmdivf32op","title":"<code>vm.div.f32</code> (VM::DivF32Op)","text":"<p>Floating point division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_115","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_100","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf64-vmdivf64op","title":"<code>vm.div.f64</code> (VM::DivF64Op)","text":"<p>Floating point division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_116","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_101","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf32-vmfmaf32op","title":"<code>vm.fma.f32</code> (VM::FMAF32Op)","text":"<p>Floating point fused multiply-add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_117","title":"Operands:","text":"Operand Description <code>a</code> 32-bit float <code>b</code> 32-bit float <code>c</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_102","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf64-vmfmaf64op","title":"<code>vm.fma.f64</code> (VM::FMAF64Op)","text":"<p>Floating point fused multiply-add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_118","title":"Operands:","text":"Operand Description <code>a</code> 64-bit float <code>b</code> 64-bit float <code>c</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_103","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf32-vmfloorf32op","title":"<code>vm.floor.f32</code> (VM::FloorF32Op)","text":"<p>Floating point floor operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.floor.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_119","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_104","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf64-vmfloorf64op","title":"<code>vm.floor.f64</code> (VM::FloorF64Op)","text":"<p>Floating point floor operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.floor.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_120","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_105","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf32-vmmaxf32op","title":"<code>vm.max.f32</code> (VM::MaxF32Op)","text":"<p>Floating point maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_121","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_106","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf64-vmmaxf64op","title":"<code>vm.max.f64</code> (VM::MaxF64Op)","text":"<p>Floating point maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_122","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_107","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf32-vmminf32op","title":"<code>vm.min.f32</code> (VM::MinF32Op)","text":"<p>Floating point minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_123","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_108","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf64-vmminf64op","title":"<code>vm.min.f64</code> (VM::MinF64Op)","text":"<p>Floating point minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_124","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_109","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf32-vmmulf32op","title":"<code>vm.mul.f32</code> (VM::MulF32Op)","text":"<p>Floating point multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_125","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_110","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf64-vmmulf64op","title":"<code>vm.mul.f64</code> (VM::MulF64Op)","text":"<p>Floating point multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_126","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_111","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf32-vmnegf32op","title":"<code>vm.neg.f32</code> (VM::NegF32Op)","text":"<p>Floating point negation operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.neg.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_127","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_112","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf64-vmnegf64op","title":"<code>vm.neg.f64</code> (VM::NegF64Op)","text":"<p>Floating point negation operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.neg.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_128","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_113","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf32-vmremf32op","title":"<code>vm.rem.f32</code> (VM::RemF32Op)","text":"<p>Floating point remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_129","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_114","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf64-vmremf64op","title":"<code>vm.rem.f64</code> (VM::RemF64Op)","text":"<p>Floating point remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_130","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_115","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32even-vmroundf32evenop","title":"<code>vm.round.f32.even</code> (VM::RoundF32EvenOp)","text":"<p>Rounds the value to the nearest even integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f32.even` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_131","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_116","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32-vmroundf32op","title":"<code>vm.round.f32</code> (VM::RoundF32Op)","text":"<p>Rounds the value to the nearest integer away from zero</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_132","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_117","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64even-vmroundf64evenop","title":"<code>vm.round.f64.even</code> (VM::RoundF64EvenOp)","text":"<p>Rounds the value to the nearest even integer</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f64.even` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_133","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_118","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64-vmroundf64op","title":"<code>vm.round.f64</code> (VM::RoundF64Op)","text":"<p>Rounds the value to the nearest integer away from zero</p> <p>Syntax:</p> <pre><code>operation ::= `vm.round.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_134","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_119","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf32-vmsubf32op","title":"<code>vm.sub.f32</code> (VM::SubF32Op)","text":"<p>Floating point subtraction operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_135","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_120","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf64-vmsubf64op","title":"<code>vm.sub.f64</code> (VM::SubF64Op)","text":"<p>Floating point subtraction operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_136","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_121","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#floating-point-comparison-ops","title":"Floating-point comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32near-vmcmpeqf32nearop","title":"<code>vm.cmp.eq.f32.near</code> (VM::CmpEQF32NearOp)","text":"<p>Near floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.near` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_137","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_122","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32o-vmcmpeqf32oop","title":"<code>vm.cmp.eq.f32.o</code> (VM::CmpEQF32OOp)","text":"<p>Ordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_138","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_123","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32u-vmcmpeqf32uop","title":"<code>vm.cmp.eq.f32.u</code> (VM::CmpEQF32UOp)","text":"<p>Unordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_139","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_124","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64near-vmcmpeqf64nearop","title":"<code>vm.cmp.eq.f64.near</code> (VM::CmpEQF64NearOp)","text":"<p>Near floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.near` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_140","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_125","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64o-vmcmpeqf64oop","title":"<code>vm.cmp.eq.f64.o</code> (VM::CmpEQF64OOp)","text":"<p>Ordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_141","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_126","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64u-vmcmpeqf64uop","title":"<code>vm.cmp.eq.f64.u</code> (VM::CmpEQF64UOp)","text":"<p>Unordered floating-point equality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_142","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_127","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32o-vmcmpgtef32oop","title":"<code>vm.cmp.gte.f32.o</code> (VM::CmpGTEF32OOp)","text":"<p>Ordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_143","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_128","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32u-vmcmpgtef32uop","title":"<code>vm.cmp.gte.f32.u</code> (VM::CmpGTEF32UOp)","text":"<p>Unordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_144","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_129","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64o-vmcmpgtef64oop","title":"<code>vm.cmp.gte.f64.o</code> (VM::CmpGTEF64OOp)","text":"<p>Ordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_145","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_130","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64u-vmcmpgtef64uop","title":"<code>vm.cmp.gte.f64.u</code> (VM::CmpGTEF64UOp)","text":"<p>Unordered floating-point greater-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gte.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_146","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_131","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32o-vmcmpgtf32oop","title":"<code>vm.cmp.gt.f32.o</code> (VM::CmpGTF32OOp)","text":"<p>Ordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_147","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_132","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32u-vmcmpgtf32uop","title":"<code>vm.cmp.gt.f32.u</code> (VM::CmpGTF32UOp)","text":"<p>Unordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_148","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_133","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64o-vmcmpgtf64oop","title":"<code>vm.cmp.gt.f64.o</code> (VM::CmpGTF64OOp)","text":"<p>Ordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_149","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_134","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64u-vmcmpgtf64uop","title":"<code>vm.cmp.gt.f64.u</code> (VM::CmpGTF64UOp)","text":"<p>Unordered floating-point greater-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.gt.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_150","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_135","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32o-vmcmpltef32oop","title":"<code>vm.cmp.lte.f32.o</code> (VM::CmpLTEF32OOp)","text":"<p>Ordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_151","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_136","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32u-vmcmpltef32uop","title":"<code>vm.cmp.lte.f32.u</code> (VM::CmpLTEF32UOp)","text":"<p>Unordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_152","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_137","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64o-vmcmpltef64oop","title":"<code>vm.cmp.lte.f64.o</code> (VM::CmpLTEF64OOp)","text":"<p>Ordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_153","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_138","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64u-vmcmpltef64uop","title":"<code>vm.cmp.lte.f64.u</code> (VM::CmpLTEF64UOp)","text":"<p>Unordered floating-point less-than-or-equal comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lte.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_154","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_139","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32o-vmcmpltf32oop","title":"<code>vm.cmp.lt.f32.o</code> (VM::CmpLTF32OOp)","text":"<p>Ordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_155","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_140","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32u-vmcmpltf32uop","title":"<code>vm.cmp.lt.f32.u</code> (VM::CmpLTF32UOp)","text":"<p>Unordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_156","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_141","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64o-vmcmpltf64oop","title":"<code>vm.cmp.lt.f64.o</code> (VM::CmpLTF64OOp)","text":"<p>Ordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_157","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_142","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64u-vmcmpltf64uop","title":"<code>vm.cmp.lt.f64.u</code> (VM::CmpLTF64UOp)","text":"<p>Unordered floating-point less-than comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.lt.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_158","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_143","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32o-vmcmpnef32oop","title":"<code>vm.cmp.ne.f32.o</code> (VM::CmpNEF32OOp)","text":"<p>Ordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f32.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_159","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_144","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32u-vmcmpnef32uop","title":"<code>vm.cmp.ne.f32.u</code> (VM::CmpNEF32UOp)","text":"<p>Unordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f32.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_160","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_145","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64o-vmcmpnef64oop","title":"<code>vm.cmp.ne.f64.o</code> (VM::CmpNEF64OOp)","text":"<p>Ordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f64.o` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_161","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_146","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64u-vmcmpnef64uop","title":"<code>vm.cmp.ne.f64.u</code> (VM::CmpNEF64UOp)","text":"<p>Unordered floating-point inequality comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.f64.u` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_162","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_147","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32o-vmcmpnzf32oop","title":"<code>vm.cmp.nz.f32.o</code> (VM::CmpNZF32OOp)","text":"<p>Ordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f32.o` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_163","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_148","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32u-vmcmpnzf32uop","title":"<code>vm.cmp.nz.f32.u</code> (VM::CmpNZF32UOp)","text":"<p>Unordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f32.u` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_164","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_149","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64o-vmcmpnzf64oop","title":"<code>vm.cmp.nz.f64.o</code> (VM::CmpNZF64OOp)","text":"<p>Ordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f64.o` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_165","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_150","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64u-vmcmpnzf64uop","title":"<code>vm.cmp.nz.f64.u</code> (VM::CmpNZF64UOp)","text":"<p>Unordered floating-point non-zero comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.f64.u` operands attr-dict `:` type($operand)\n</code></pre> <p>Compares the given floating-point operand for a non-zero value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code>, <code>VM_PseudoOp</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_166","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_151","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf32-vmcmpnanf32op","title":"<code>vm.cmp.nan.f32</code> (VM::CmpNaNF32Op)","text":"<p>Floating-point NaN comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nan.f32` $operand attr-dict `:` type($operand)\n</code></pre> <p>Returns 1 if the value is NaN.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_167","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_152","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf64-vmcmpnanf64op","title":"<code>vm.cmp.nan.f64</code> (VM::CmpNaNF64Op)","text":"<p>Floating-point NaN comparison operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nan.f64` $operand attr-dict `:` type($operand)\n</code></pre> <p>Returns 1 if the value is NaN.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_168","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_153","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#floating-point-math-ops","title":"Floating-point math ops","text":"<p>These map directly to the <code>math</code> dialect.</p>"},{"location":"reference/mlir-dialects/VM/#vmatan2f32-vmatan2f32op","title":"<code>vm.atan2.f32</code> (VM::Atan2F32Op)","text":"<p>2-argument arcus tangent of the given values</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan2.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_169","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_154","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatan2f64-vmatan2f64op","title":"<code>vm.atan2.f64</code> (VM::Atan2F64Op)","text":"<p>2-argument arcus tangent of the given values</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan2.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_170","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_155","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf32-vmatanf32op","title":"<code>vm.atan.f32</code> (VM::AtanF32Op)","text":"<p>Arcus tangent of the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_171","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_156","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf64-vmatanf64op","title":"<code>vm.atan.f64</code> (VM::AtanF64Op)","text":"<p>Arcus tangent of the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.atan.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_172","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_157","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf32-vmcosf32op","title":"<code>vm.cos.f32</code> (VM::CosF32Op)","text":"<p>Cosine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cos.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_173","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_158","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf64-vmcosf64op","title":"<code>vm.cos.f64</code> (VM::CosF64Op)","text":"<p>Cosine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.cos.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_174","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_159","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff32-vmerff32op","title":"<code>vm.erf.f32</code> (VM::ErfF32Op)","text":"<p>Computes the error function of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.erf.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_175","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_160","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff64-vmerff64op","title":"<code>vm.erf.f64</code> (VM::ErfF64Op)","text":"<p>Computes the error function of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.erf.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_176","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_161","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f32-vmexp2f32op","title":"<code>vm.exp2.f32</code> (VM::Exp2F32Op)","text":"<p>Base-2 exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp2.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_177","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_162","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f64-vmexp2f64op","title":"<code>vm.exp2.f64</code> (VM::Exp2F64Op)","text":"<p>Base-2 exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp2.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_178","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_163","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf32-vmexpf32op","title":"<code>vm.exp.f32</code> (VM::ExpF32Op)","text":"<p>Base-e exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_179","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_164","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf64-vmexpf64op","title":"<code>vm.exp.f64</code> (VM::ExpF64Op)","text":"<p>Base-e exponential of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.exp.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_180","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_165","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f32-vmexpm1f32op","title":"<code>vm.expm1.f32</code> (VM::ExpM1F32Op)","text":"<p>Base-e exponential of the specified value minus 1</p> <p>Syntax:</p> <pre><code>operation ::= `vm.expm1.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_181","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_166","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f64-vmexpm1f64op","title":"<code>vm.expm1.f64</code> (VM::ExpM1F64Op)","text":"<p>Base-e exponential of the specified value minus 1</p> <p>Syntax:</p> <pre><code>operation ::= `vm.expm1.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_182","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_167","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f32-vmlog10f32op","title":"<code>vm.log10.f32</code> (VM::Log10F32Op)","text":"<p>Base-10 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log10.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_183","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_168","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f64-vmlog10f64op","title":"<code>vm.log10.f64</code> (VM::Log10F64Op)","text":"<p>Base-10 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log10.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_184","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_169","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf32-vmlog1pf32op","title":"<code>vm.log1p.f32</code> (VM::Log1pF32Op)","text":"<p>Natural logarithm of one plus the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log1p.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_185","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_170","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf64-vmlog1pf64op","title":"<code>vm.log1p.f64</code> (VM::Log1pF64Op)","text":"<p>Natural logarithm of one plus the given value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log1p.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_186","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_171","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f32-vmlog2f32op","title":"<code>vm.log2.f32</code> (VM::Log2F32Op)","text":"<p>Base-2 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log2.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_187","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_172","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f64-vmlog2f64op","title":"<code>vm.log2.f64</code> (VM::Log2F64Op)","text":"<p>Base-2 logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log2.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_188","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_173","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf32-vmlogf32op","title":"<code>vm.log.f32</code> (VM::LogF32Op)","text":"<p>Base-e logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_189","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_174","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf64-vmlogf64op","title":"<code>vm.log.f64</code> (VM::LogF64Op)","text":"<p>Base-e logarithm of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.log.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_190","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_175","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf32-vmpowf32op","title":"<code>vm.pow.f32</code> (VM::PowF32Op)","text":"<p>Floating point raised to the power of operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.pow.f32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_191","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit float <code>rhs</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_176","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf64-vmpowf64op","title":"<code>vm.pow.f64</code> (VM::PowF64Op)","text":"<p>Floating point raised to the power of operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.pow.f64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_192","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit float <code>rhs</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_177","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf32-vmrsqrtf32op","title":"<code>vm.rsqrt.f32</code> (VM::RsqrtF32Op)","text":"<p>Reciprocal of sqrt (1 / sqrt of the specified value)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rsqrt.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_193","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_178","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf64-vmrsqrtf64op","title":"<code>vm.rsqrt.f64</code> (VM::RsqrtF64Op)","text":"<p>Reciprocal of sqrt (1 / sqrt of the specified value)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rsqrt.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_194","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_179","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf32-vmsinf32op","title":"<code>vm.sin.f32</code> (VM::SinF32Op)","text":"<p>Sine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sin.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_195","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_180","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf64-vmsinf64op","title":"<code>vm.sin.f64</code> (VM::SinF64Op)","text":"<p>Sine of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sin.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_196","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_181","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf32-vmsqrtf32op","title":"<code>vm.sqrt.f32</code> (VM::SqrtF32Op)","text":"<p>Sqrt of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sqrt.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_197","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_182","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf64-vmsqrtf64op","title":"<code>vm.sqrt.f64</code> (VM::SqrtF64Op)","text":"<p>Sqrt of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sqrt.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_198","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_183","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf32-vmtanhf32op","title":"<code>vm.tanh.f32</code> (VM::TanhF32Op)","text":"<p>Hyperbolic tangent of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.tanh.f32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_199","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_184","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf64-vmtanhf64op","title":"<code>vm.tanh.f64</code> (VM::TanhF64Op)","text":"<p>Hyperbolic tangent of the specified value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.tanh.f64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_200","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_185","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/VM/#vmglobaladdress-vmglobaladdressop","title":"<code>vm.global.address</code> (VM::GlobalAddressOp)","text":"<p>Returns an address reference to a global</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.address` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($result)\n</code></pre> <p>Returns an indirect address reference to the given global. During export the address will be converted to the natural format of the global table (for example, ordinals for refs and byte offsets for primitive types).</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalAddressOpInterface</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_186","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer or a pointer-like reference"},{"location":"reference/mlir-dialects/VM/#vmglobalf32-vmglobalf32op","title":"<code>vm.global.f32</code> (VM::GlobalF32Op)","text":"<p>32-bit floating-point global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.f32` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>VM_ExtF32</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>FloatAttr32-bit floating-point value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalf64-vmglobalf64op","title":"<code>vm.global.f64</code> (VM::GlobalF64Op)","text":"<p>64-bit floating-point global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.f64` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code>, <code>VM_ExtF64</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>FloatAttr64-bit floating-point value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali32-vmglobali32op","title":"<code>vm.global.i32</code> (VM::GlobalI32Op)","text":"<p>32-bit integer global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.i32` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>IntegerAttr32-bit integer value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali64-vmglobali64op","title":"<code>vm.global.i64</code> (VM::GlobalI64Op)","text":"<p>64-bit integer global declaration</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.i64` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              custom&lt;TypeOrAttr&gt;($type, $initial_value)\n</code></pre> <p>Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>initial_value</code>IntegerAttr64-bit integer value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf32-vmgloballoadf32op","title":"<code>vm.global.load.f32</code> (VM::GlobalLoadF32Op)","text":"<p>Global 32-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.f32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_187","title":"Results:","text":"Result Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf64-vmgloballoadf64op","title":"<code>vm.global.load.f64</code> (VM::GlobalLoadF64Op)","text":"<p>Global 64-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.f64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_188","title":"Results:","text":"Result Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi32-vmgloballoadi32op","title":"<code>vm.global.load.i32</code> (VM::GlobalLoadI32Op)","text":"<p>Global 32-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.i32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_189","title":"Results:","text":"Result Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi64-vmgloballoadi64op","title":"<code>vm.global.load.i64</code> (VM::GlobalLoadI64Op)","text":"<p>Global 64-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.i64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing an primitive value.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_190","title":"Results:","text":"Result Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf32-vmgloballoadindirectf32op","title":"<code>vm.global.load.indirect.f32</code> (VM::GlobalLoadIndirectF32Op)","text":"<p>Global 32-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.f32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_201","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;32-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#results_191","title":"Results:","text":"Result Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf64-vmgloballoadindirectf64op","title":"<code>vm.global.load.indirect.f64</code> (VM::GlobalLoadIndirectF64Op)","text":"<p>Global 64-bit floating-point load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.f64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_202","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;64-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#results_192","title":"Results:","text":"Result Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti32-vmgloballoadindirecti32op","title":"<code>vm.global.load.indirect.i32</code> (VM::GlobalLoadIndirectI32Op)","text":"<p>Global 32-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.i32` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_203","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;32-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#results_193","title":"Results:","text":"Result Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti64-vmgloballoadindirecti64op","title":"<code>vm.global.load.indirect.i64</code> (VM::GlobalLoadIndirectI64Op)","text":"<p>Global 64-bit integer load operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.i64` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a primitive value.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_204","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr&lt;64-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#results_194","title":"Results:","text":"Result Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectref-vmgloballoadindirectrefop","title":"<code>vm.global.load.indirect.ref</code> (VM::GlobalLoadIndirectRefOp)","text":"<p>Global ref load operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.indirect.ref` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($global) `-&gt;` type($value)\n</code></pre> <p>Loads the value of a global containing a ref of the given type.</p> <p>Interfaces: <code>Util_GlobalLoadIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#operands_205","title":"Operands:","text":"Operand Description <code>global</code> 32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#results_195","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmgloballoadref-vmgloballoadrefop","title":"<code>vm.global.load.ref</code> (VM::GlobalLoadRefOp)","text":"<p>Global ref load operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.load.ref` (`immutable` $is_immutable^)?\n              $global attr-dict `:` type($value)\n</code></pre> <p>Loads the value of a global containing a ref of the given type.</p> <p>Interfaces: <code>MemoryEffectOpInterface</code>, <code>OpAsmOpInterface</code>, <code>SymbolUserOpInterface</code>, <code>Util_GlobalLoadOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute <code>is_immutable</code>::mlir::UnitAttrunit attribute"},{"location":"reference/mlir-dialects/VM/#results_196","title":"Results:","text":"Result Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmglobalref-vmglobalrefop","title":"<code>vm.global.ref</code> (VM::GlobalRefOp)","text":"<p>Ref global declaration <p>Syntax:</p> <pre><code>operation ::= `vm.global.ref` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              (`mutable` $is_mutable^)?\n              $sym_name\n              attr-dict\n              `:` $type\n</code></pre> <p>Defines a global value that is a ref of a specific type. The global will retain the ref object for the lifetime of the context or until the value is replaced with a store or reset. Initialized to null unless an initial value is specified.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>GlobalOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>type</code>::mlir::TypeAttrany type attribute <code>is_mutable</code>::mlir::UnitAttrunit attribute <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref32-vmglobalstoref32op","title":"<code>vm.global.store.f32</code> (VM::GlobalStoreF32Op)","text":"<p>Global 32-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.f32` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_206","title":"Operands:","text":"Operand Description <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref64-vmglobalstoref64op","title":"<code>vm.global.store.f64</code> (VM::GlobalStoreF64Op)","text":"<p>Global 64-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.f64` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_207","title":"Operands:","text":"Operand Description <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei32-vmglobalstorei32op","title":"<code>vm.global.store.i32</code> (VM::GlobalStoreI32Op)","text":"<p>Global 32-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.i32` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_208","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei64-vmglobalstorei64op","title":"<code>vm.global.store.i64</code> (VM::GlobalStoreI64Op)","text":"<p>Global 64-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.i64` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a primitive value value to a global.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_209","title":"Operands:","text":"Operand Description <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf32-vmglobalstoreindirectf32op","title":"<code>vm.global.store.indirect.f32</code> (VM::GlobalStoreIndirectF32Op)","text":"<p>Global 32-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.f32` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_210","title":"Operands:","text":"Operand Description <code>value</code> 32-bit float <code>global</code> 32-bit signless integer or ptr&lt;32-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf64-vmglobalstoreindirectf64op","title":"<code>vm.global.store.indirect.f64</code> (VM::GlobalStoreIndirectF64Op)","text":"<p>Global 64-bit floating-point store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.f64` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_211","title":"Operands:","text":"Operand Description <code>value</code> 64-bit float <code>global</code> 32-bit signless integer or ptr&lt;64-bit float&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti32-vmglobalstoreindirecti32op","title":"<code>vm.global.store.indirect.i32</code> (VM::GlobalStoreIndirectI32Op)","text":"<p>Global 32-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.i32` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_212","title":"Operands:","text":"Operand Description <code>value</code> 32-bit signless integer <code>global</code> 32-bit signless integer or ptr&lt;32-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti64-vmglobalstoreindirecti64op","title":"<code>vm.global.store.indirect.i64</code> (VM::GlobalStoreIndirectI64Op)","text":"<p>Global 64-bit integer store operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.i64` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a primitive value to a global.</p> <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_213","title":"Operands:","text":"Operand Description <code>value</code> 64-bit signless integer <code>global</code> 32-bit signless integer or ptr&lt;64-bit signless integer&gt;"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectref-vmglobalstoreindirectrefop","title":"<code>vm.global.store.indirect.ref</code> (VM::GlobalStoreIndirectRefOp)","text":"<p>Global ref stores operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.indirect.ref` $value `,` $global attr-dict `:` type($value) `-&gt;` type($global)\n</code></pre> <p>Stores a ref to a global, retaining it until the global is reset. <p>Interfaces: <code>Util_GlobalStoreIndirectOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_214","title":"Operands:","text":"Operand Description <code>value</code> ref <code>global</code> 32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreref-vmglobalstorerefop","title":"<code>vm.global.store.ref</code> (VM::GlobalStoreRefOp)","text":"<p>Global ref stores operation <p>Syntax:</p> <pre><code>operation ::= `vm.global.store.ref` $value `,` $global attr-dict `:` type($value)\n</code></pre> <p>Stores a ref to a global, retaining it until the global is reset. <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>Util_GlobalStoreOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>global</code>FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_215","title":"Operands:","text":"Operand Description <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#integer-arithmetic-ops","title":"Integer arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsi32-vmabsi32op","title":"<code>vm.abs.i32</code> (VM::AbsI32Op)","text":"<p>Integer absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_216","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_197","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmabsi64-vmabsi64op","title":"<code>vm.abs.i64</code> (VM::AbsI64Op)","text":"<p>Integer absolute-value operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.abs.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_217","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_198","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi32-vmaddi32op","title":"<code>vm.add.i32</code> (VM::AddI32Op)","text":"<p>Integer add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_218","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_199","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi64-vmaddi64op","title":"<code>vm.add.i64</code> (VM::AddI64Op)","text":"<p>Integer add operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.add.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_219","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_200","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32s-vmdivi32sop","title":"<code>vm.div.i32.s</code> (VM::DivI32SOp)","text":"<p>Signed integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_220","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_201","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32u-vmdivi32uop","title":"<code>vm.div.i32.u</code> (VM::DivI32UOp)","text":"<p>Unsigned integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_221","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_202","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64s-vmdivi64sop","title":"<code>vm.div.i64.s</code> (VM::DivI64SOp)","text":"<p>Signed integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_222","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_203","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64u-vmdivi64uop","title":"<code>vm.div.i64.u</code> (VM::DivI64UOp)","text":"<p>Unsigned integer division operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.div.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_223","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_204","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai32-vmfmai32op","title":"<code>vm.fma.i32</code> (VM::FMAI32Op)","text":"<p>Integer fused-multiply add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_224","title":"Operands:","text":"Operand Description <code>a</code> 32-bit signless integer <code>b</code> 32-bit signless integer <code>c</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_205","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai64-vmfmai64op","title":"<code>vm.fma.i64</code> (VM::FMAI64Op)","text":"<p>Integer fused-multiply add operation (a*b+c)</p> <p>Syntax:</p> <pre><code>operation ::= `vm.fma.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_225","title":"Operands:","text":"Operand Description <code>a</code> 64-bit signless integer <code>b</code> 64-bit signless integer <code>c</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_206","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32s-vmmaxi32sop","title":"<code>vm.max.i32.s</code> (VM::MaxI32SOp)","text":"<p>Signed integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_226","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_207","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32u-vmmaxi32uop","title":"<code>vm.max.i32.u</code> (VM::MaxI32UOp)","text":"<p>Unsigned integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_227","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_208","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64s-vmmaxi64sop","title":"<code>vm.max.i64.s</code> (VM::MaxI64SOp)","text":"<p>Signed integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_228","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_209","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64u-vmmaxi64uop","title":"<code>vm.max.i64.u</code> (VM::MaxI64UOp)","text":"<p>Unsigned integer maximum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.max.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_229","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_210","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32s-vmmini32sop","title":"<code>vm.min.i32.s</code> (VM::MinI32SOp)","text":"<p>Signed integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_230","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_211","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32u-vmmini32uop","title":"<code>vm.min.i32.u</code> (VM::MinI32UOp)","text":"<p>Unsigned integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_231","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_212","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64s-vmmini64sop","title":"<code>vm.min.i64.s</code> (VM::MinI64SOp)","text":"<p>Signed integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_232","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_213","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64u-vmmini64uop","title":"<code>vm.min.i64.u</code> (VM::MinI64UOp)","text":"<p>Unsigned integer minimum operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.min.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_233","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_214","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli32-vmmuli32op","title":"<code>vm.mul.i32</code> (VM::MulI32Op)","text":"<p>Integer multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_234","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_215","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli64-vmmuli64op","title":"<code>vm.mul.i64</code> (VM::MulI64Op)","text":"<p>Integer multiplication operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.mul.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>Commutative</code></p> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_235","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_216","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32s-vmremi32sop","title":"<code>vm.rem.i32.s</code> (VM::RemI32SOp)","text":"<p>Signed integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i32.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_236","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_217","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32u-vmremi32uop","title":"<code>vm.rem.i32.u</code> (VM::RemI32UOp)","text":"<p>Unsigned integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i32.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_237","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_218","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64s-vmremi64sop","title":"<code>vm.rem.i64.s</code> (VM::RemI64SOp)","text":"<p>Signed integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i64.s` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_238","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_219","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64u-vmremi64uop","title":"<code>vm.rem.i64.u</code> (VM::RemI64UOp)","text":"<p>Unsigned integer division remainder operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.rem.i64.u` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_239","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_220","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi32-vmsubi32op","title":"<code>vm.sub.i32</code> (VM::SubI32Op)","text":"<p>Integer subtract operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_240","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_221","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi64-vmsubi64op","title":"<code>vm.sub.i64</code> (VM::SubI64Op)","text":"<p>Integer subtract operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.sub.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Interfaces: <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_241","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_222","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#integer-bit-manipulation-ops","title":"Integer bit manipulation ops","text":""},{"location":"reference/mlir-dialects/VM/#vmandi32-vmandi32op","title":"<code>vm.and.i32</code> (VM::AndI32Op)","text":"<p>Integer binary and operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.and.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_242","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_223","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmandi64-vmandi64op","title":"<code>vm.and.i64</code> (VM::AndI64Op)","text":"<p>Integer binary and operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.and.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_243","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_224","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi32-vmctlzi32op","title":"<code>vm.ctlz.i32</code> (VM::CtlzI32Op)","text":"<p>Counts the leading zeros in an integer value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ctlz.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_244","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_225","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi64-vmctlzi64op","title":"<code>vm.ctlz.i64</code> (VM::CtlzI64Op)","text":"<p>Counts the leading zeros in an integer value</p> <p>Syntax:</p> <pre><code>operation ::= `vm.ctlz.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_245","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_226","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti32-vmnoti32op","title":"<code>vm.not.i32</code> (VM::NotI32Op)","text":"<p>Integer binary not operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.not.i32` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_246","title":"Operands:","text":"Operand Description <code>operand</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_227","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti64-vmnoti64op","title":"<code>vm.not.i64</code> (VM::NotI64Op)","text":"<p>Integer binary not operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.not.i64` $operand attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_247","title":"Operands:","text":"Operand Description <code>operand</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_228","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori32-vmori32op","title":"<code>vm.or.i32</code> (VM::OrI32Op)","text":"<p>Integer binary or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.or.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_248","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_229","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori64-vmori64op","title":"<code>vm.or.i64</code> (VM::OrI64Op)","text":"<p>Integer binary or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.or.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_249","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_230","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori32-vmxori32op","title":"<code>vm.xor.i32</code> (VM::XorI32Op)","text":"<p>Integer binary exclusive-or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.xor.i32` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_250","title":"Operands:","text":"Operand Description <code>lhs</code> 32-bit signless integer <code>rhs</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_231","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori64-vmxori64op","title":"<code>vm.xor.i64</code> (VM::XorI64Op)","text":"<p>Integer binary exclusive-or operation</p> <p>Syntax:</p> <pre><code>operation ::= `vm.xor.i64` operands attr-dict `:` type($result)\n</code></pre> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_251","title":"Operands:","text":"Operand Description <code>lhs</code> 64-bit signless integer <code>rhs</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_232","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#list-ops","title":"List ops","text":""},{"location":"reference/mlir-dialects/VM/#vmlistalloc-vmlistallocop","title":"<code>vm.list.alloc</code> (VM::ListAllocOp)","text":"<p>Allocates a new empty list</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.alloc` operands attr-dict `:` `(` type($initial_capacity) `)` `-&gt;` type($result)\n</code></pre> <p>Allocates a new typed list with a minimum initial_capacity.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_252","title":"Operands:","text":"Operand Description <code>initial_capacity</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_233","title":"Results:","text":"Result Description <code>result</code> list"},{"location":"reference/mlir-dialects/VM/#vmlistgetf32-vmlistgetf32op","title":"<code>vm.list.get.f32</code> (VM::ListGetF32Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.f32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_253","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_234","title":"Results:","text":"Result Description <code>result</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgetf64-vmlistgetf64op","title":"<code>vm.list.get.f64</code> (VM::ListGetF64Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.f64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_254","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_235","title":"Results:","text":"Result Description <code>result</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgeti32-vmlistgeti32op","title":"<code>vm.list.get.i32</code> (VM::ListGetI32Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.i32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_255","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_236","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgeti64-vmlistgeti64op","title":"<code>vm.list.get.i64</code> (VM::ListGetI64Op)","text":"<p>Primitive type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.i64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the value of the element at the given index.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_256","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_237","title":"Results:","text":"Result Description <code>result</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgetref-vmlistgetrefop","title":"<code>vm.list.get.ref</code> (VM::ListGetRefOp)","text":"<p>Ref type element accessor</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.get.ref` operands attr-dict `:` `(` type($list) `,` type($index) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the ref value of the element at the given index. Note that the value may be null if the element is null or the type does not match.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_257","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_238","title":"Results:","text":"Result Description <code>result</code> ref"},{"location":"reference/mlir-dialects/VM/#vmlistreserve-vmlistreserveop","title":"<code>vm.list.reserve</code> (VM::ListReserveOp)","text":"<p>Reserves capacity for list growth</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.reserve` operands attr-dict `:` `(` type($list) `,` type($minimum_capacity) `)`\n</code></pre> <p>Reserves storage for at least minimum_capacity elements. If the list already has at least the specified capacity the operation is ignored.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_258","title":"Operands:","text":"Operand Description <code>list</code> list <code>minimum_capacity</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistresize-vmlistresizeop","title":"<code>vm.list.resize</code> (VM::ListResizeOp)","text":"<p>Resizes the list to a new count in elements</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.resize` operands attr-dict `:` `(` type($list) `,` type($new_size) `)`\n</code></pre> <p>Resizes the list to contain new_size elements. This will either truncate the list if the existing size is greater than new_size or extend the list with the default list value of 0 if storing primitives and null if refs.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_259","title":"Operands:","text":"Operand Description <code>list</code> list <code>new_size</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetf32-vmlistsetf32op","title":"<code>vm.list.set.f32</code> (VM::ListSetF32Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.f32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Traits: <code>VM_ExtF32</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_260","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistsetf64-vmlistsetf64op","title":"<code>vm.list.set.f64</code> (VM::ListSetF64Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.f64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Traits: <code>VM_ExtF64</code></p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_261","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistseti32-vmlistseti32op","title":"<code>vm.list.set.i32</code> (VM::ListSetI32Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.i32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_262","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistseti64-vmlistseti64op","title":"<code>vm.list.set.i64</code> (VM::ListSetI64Op)","text":"<p>Primitive type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.i64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new value.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_263","title":"Operands:","text":"Operand Description <code>list</code> list&lt;8/16/32/64-bit integer or 16/32/64-bit float&gt; <code>index</code> 32-bit signless integer <code>value</code> 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetref-vmlistsetrefop","title":"<code>vm.list.set.ref</code> (VM::ListSetRefOp)","text":"<p>Ref type element mutator</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.set.ref` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n</code></pre> <p>Sets the element at the given index to the new ref value (possibly null).</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_264","title":"Operands:","text":"Operand Description <code>list</code> list <code>index</code> 32-bit signless integer <code>value</code> ref"},{"location":"reference/mlir-dialects/VM/#vmlistsize-vmlistsizeop","title":"<code>vm.list.size</code> (VM::ListSizeOp)","text":"<p>The size of the list in elements</p> <p>Syntax:</p> <pre><code>operation ::= `vm.list.size` operands attr-dict `:` `(` type($list) `)` `-&gt;` type($result)\n</code></pre> <p>Returns the current size of the list in elements.</p> <p>Interfaces: <code>MemoryEffectOpInterface (MemoryEffectOpInterface)</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_265","title":"Operands:","text":"Operand Description <code>list</code> list"},{"location":"reference/mlir-dialects/VM/#results_239","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#ref-comparison-ops","title":"Ref comparison ops","text":"<p>Comparison ops for <code>vm.ref</code>.</p>"},{"location":"reference/mlir-dialects/VM/#vmcmpeqref-vmcmpeqrefop","title":"<code>vm.cmp.eq.ref</code> (VM::CmpEQRefOp)","text":"<p>Ref equality comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.eq.ref` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_266","title":"Operands:","text":"Operand Description <code>lhs</code> ref <code>rhs</code> ref"},{"location":"reference/mlir-dialects/VM/#results_240","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpneref-vmcmpnerefop","title":"<code>vm.cmp.ne.ref</code> (VM::CmpNERefOp)","text":"<p>Ref inequality comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.ne.ref` operands attr-dict `:` type($lhs)\n</code></pre> <p>Compares two operands with the specified predicate.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>Commutative</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_267","title":"Operands:","text":"Operand Description <code>lhs</code> ref <code>rhs</code> ref"},{"location":"reference/mlir-dialects/VM/#results_241","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzref-vmcmpnzrefop","title":"<code>vm.cmp.nz.ref</code> (VM::CmpNZRefOp)","text":"<p>Ref non-zero comparison operation <p>Syntax:</p> <pre><code>operation ::= `vm.cmp.nz.ref` $operand attr-dict `:` type($operand)\n</code></pre> <p>Compares the given ref operand for a non-zero/null value.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code>, <code>OpAsmOpInterface</code>, <code>VMSerializableOp</code>, <code>VM_OpInterface</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VM/#operands_268","title":"Operands:","text":"Operand Description <code>operand</code> ref"},{"location":"reference/mlir-dialects/VM/#results_242","title":"Results:","text":"Result Description <code>result</code> 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/VM/#vmexport-vmexportop","title":"<code>vm.export</code> (VM::ExportOp)","text":"<p>Exports a function from the module</p> <p>Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal functions.</p> <p>Interfaces: <code>SymbolUserOpInterface</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_ref</code>::mlir::FlatSymbolRefAttrflat symbol reference attribute <code>export_name</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmfunc-vmfuncop","title":"<code>vm.func</code> (VM::FuncOp)","text":"<p>Function defined with VM control flow ops</p> <p>Represents a function containing VM ops and those of compatible dialects. All flow control is performed by VM ops.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>ordinal</code>::mlir::IntegerAttrordinal value <code>inlining_policy</code>::mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterfaceInliningPolicyAttrInterface instance <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmimport-vmimportop","title":"<code>vm.import</code> (VM::ImportOp)","text":"<p>Imports a function from an external module</p> <p>Specifies a function that should be imported from either the runtime or an external VM module.</p> <p>Required imports can be declared with a minimum version of the module that contains the import. The maximum declared minimum version of all required imports from the module will become the required minimum version at runtime.</p> <p>Optional imports not present at runtime will be invalid to call and whether they were resolved can be queried with <code>vm.import.resolved</code>.</p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_name</code>::mlir::StringAttrstring attribute <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>ordinal</code>::mlir::IntegerAttrordinal value <code>is_optional</code>::mlir::UnitAttrunit attribute <code>minimum_version</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vminitializer-vminitializerop","title":"<code>vm.initializer</code> (VM::InitializerOp)","text":"<p>Global initialization function</p> <p>A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.</p> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>IsolatedFromAbove</code></p> <p>Interfaces: <code>CallableOpInterface</code>, <code>FunctionOpInterface</code>, <code>Symbol</code>, <code>Util_InitializerOpInterface</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>function_type</code>::mlir::TypeAttrtype attribute of function type <code>arg_attrs</code>::mlir::ArrayAttrArray of dictionary attributes <code>res_attrs</code>::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmmodule-vmmoduleop","title":"<code>vm.module</code> (VM::ModuleOp)","text":"<p>Module containing VM functions and variables</p> <p>Syntax:</p> <pre><code>operation ::= `vm.module` custom&lt;SymbolVisibility&gt;($sym_visibility)\n              $sym_name\n              attr-dict-with-keyword\n              regions\n</code></pre> <p>Top-level container for VM functions.</p> <p>Traits: <code>IsolatedFromAbove</code>, <code>SingleBlockImplicitTerminator&lt;IREE::VM::ModuleTerminatorOp&gt;</code>, <code>SingleBlock</code>, <code>SymbolTable</code></p> <p>Interfaces: <code>Symbol</code>, <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>sym_visibility</code>::mlir::StringAttrstring attribute <code>sym_name</code>::mlir::StringAttrstring attribute <code>ordinal_counts</code>::mlir::iree_compiler::IREE::VM::OrdinalCountsAttr <code>version</code>::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vmmodule_terminator-vmmoduleterminatorop","title":"<code>vm.module_terminator</code> (VM::ModuleTerminatorOp)","text":"<p>Terminator pseudo-op for the module op</p> <p>Syntax:</p> <pre><code>operation ::= `vm.module_terminator` attr-dict\n</code></pre> <p>Traits: <code>HasParent&lt;IREE::VM::ModuleOp&gt;</code>, <code>Terminator</code></p> <p>Interfaces: <code>VM_OpInterface</code></p>"},{"location":"reference/mlir-dialects/VM/#attributes_46","title":"Attributes","text":""},{"location":"reference/mlir-dialects/VM/#ordinalcountsattr","title":"OrdinalCountsAttr","text":"<p>Syntax:</p> <pre><code>#vm.ordinal_counts&lt;\n  int32_t,   # import_funcs\n  int32_t,   # export_funcs\n  int32_t,   # internal_funcs\n  int32_t,   # global_bytes\n  int32_t,   # global_refs\n  int32_t,   # rodatas\n  int32_t   # rwdatas\n&gt;\n</code></pre>"},{"location":"reference/mlir-dialects/VM/#parameters","title":"Parameters:","text":"Parameter C++ type Description import_funcs <code>int32_t</code> export_funcs <code>int32_t</code> internal_funcs <code>int32_t</code> global_bytes <code>int32_t</code> global_refs <code>int32_t</code> rodatas <code>int32_t</code> rwdatas <code>int32_t</code>"},{"location":"reference/mlir-dialects/VMVX/","title":"VMVX","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvx-dialect","title":"'vmvx' Dialect","text":"<p>Vector extensions to the IREE VM.</p> <p>This is a reference dialect representing a simple IREE VM-based linear algebra module that is used as a library at runtime. The ops in this dialect map (roughly) 1:1 with the exported functions in the runtime module.</p> <p>See <code>vmvx.imports.mlir</code> for the full list of exported functions.</p> <ul> <li>'vmvx' Dialect<ul> <li>Operations<ul> <li>ABI ops<ul> <li>vmvx.binary (VMVX::BinaryOp)</li> <li>vmvx.copy (VMVX::CopyOp)</li> <li>vmvx.fill2d (VMVX::Fill2DOp)</li> <li>vmvx.unary (VMVX::UnaryOp)</li> </ul> </li> <li>Utility ops<ul> <li>vmvx.get_buffer_descriptor (VMVX::GetBufferDescriptorOp)</li> <li>vmvx.get_raw_interface_binding_buffer (VMVX::GetRawInterfaceBindingBufferOp)</li> </ul> </li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/mlir-dialects/VMVX/#operations","title":"Operations","text":""},{"location":"reference/mlir-dialects/VMVX/#abi-ops","title":"ABI ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxbinary-vmvxbinaryop","title":"<code>vmvx.binary</code> (VMVX::BinaryOp)","text":"<p>Performs a strided elementwise operation on two same-rank buffers</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.binary` `op` `` `(` $opcode `:` $element_type `)`\n              `lhs` `` `(` $lhs_buffer `offset` $lhs_offset `strides` `[` $lhs_strides `]` `:` type($lhs_buffer) `)`\n              `rhs` `` `(` $rhs_buffer `offset` $rhs_offset `strides` `[` $rhs_strides `]` `:` type($rhs_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              attr-dict\n</code></pre> <p>Performs the operation in-place as if: <pre><code>  OUT = OP(LHS, RHS)\n</code></pre></p> <p>Where <code>OP</code> is a concrete operation name as defined in ukernel/elementwise.h</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>opcode</code>::mlir::StringAttrstring attribute <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands","title":"Operands:","text":"Operand Description <code>lhs_buffer</code> a reference counted byte buffer <code>lhs_offset</code> index <code>lhs_strides</code> variadic of index <code>rhs_buffer</code> a reference counted byte buffer <code>rhs_offset</code> index <code>rhs_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxcopy-vmvxcopyop","title":"<code>vmvx.copy</code> (VMVX::CopyOp)","text":"<p>Copy from one buffer to another</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.copy` `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              `:` $element_type\n              attr-dict\n</code></pre> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_1","title":"Operands:","text":"Operand Description <code>in_buffer</code> a reference counted byte buffer <code>in_offset</code> index <code>in_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxfill2d-vmvxfill2dop","title":"<code>vmvx.fill2d</code> (VMVX::Fill2DOp)","text":"<p>Fill a tile with a scalar</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.fill2d` `scalar` `` `(` $scalar `:` type($scalar) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `row_stride` $out_row_stride `:` type($out_buffer) `)`\n              `sizes` `` `(` $m `,` $n `)`\n              attr-dict\n</code></pre> <p>Fills a tile with dimensions [m, n] with a scalar.</p>"},{"location":"reference/mlir-dialects/VMVX/#operands_2","title":"Operands:","text":"Operand Description <code>scalar</code> 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_row_stride</code> index <code>m</code> index <code>n</code> index"},{"location":"reference/mlir-dialects/VMVX/#vmvxunary-vmvxunaryop","title":"<code>vmvx.unary</code> (VMVX::UnaryOp)","text":"<p>Performs a strided elementwise unary operation</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.unary` `op` `` `(` $opcode `:` $element_type `)`\n              `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n              `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n              `sizes` `` `(` $sizes `)`\n              attr-dict\n</code></pre> <p>Performs the operation in-place as if: <pre><code>  OUT = OP(IN)\n</code></pre></p> <p>Where <code>OP</code> is a concrete operation name as defined in ukernel/elementwise.h</p> <p>Traits: <code>SameVariadicOperandSize</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>opcode</code>::mlir::StringAttrstring attribute <code>element_type</code>::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_3","title":"Operands:","text":"Operand Description <code>in_buffer</code> a reference counted byte buffer <code>in_offset</code> index <code>in_strides</code> variadic of index <code>out_buffer</code> a reference counted byte buffer <code>out_offset</code> index <code>out_strides</code> variadic of index <code>sizes</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxget_buffer_descriptor-vmvxgetbufferdescriptorop","title":"<code>vmvx.get_buffer_descriptor</code> (VMVX::GetBufferDescriptorOp)","text":"<p>Late binds a base buffer/offset/strides</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.get_buffer_descriptor` $source `:` type($source) `-&gt;` type(results) attr-dict\n</code></pre> <p>Queries a base buffer, offset and strides. This op is late bound to its source (alloca, binding, etc), allowing additional layers of transformations to be added as lowering progresses (or for buffers to be combined).</p> <p>This op has canonicalization rules which will bubble it up through the view stack. A final reconciliation pass is used explicitly to bind it to concrete sources.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code>, <code>SameVariadicResultSize</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VMVX/#operands_4","title":"Operands:","text":"Operand Description <code>source</code> memref of any type values"},{"location":"reference/mlir-dialects/VMVX/#results","title":"Results:","text":"Result Description <code>base_buffer</code> a reference counted byte buffer <code>offset</code> index <code>sizes</code> variadic of index <code>strides</code> variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxget_raw_interface_binding_buffer-vmvxgetrawinterfacebindingbufferop","title":"<code>vmvx.get_raw_interface_binding_buffer</code> (VMVX::GetRawInterfaceBindingBufferOp)","text":"<p>Gets the raw buffer associated with a binding</p> <p>Syntax:</p> <pre><code>operation ::= `vmvx.get_raw_interface_binding_buffer` `set` `(` $set `)` `binding` `(` $binding `)` attr-dict\n</code></pre> <p>Normally, a slice of a binding buffer is returned via hal.interface.binding.subspan. However, the normal VMVX lowering flow for this presumes that the result is a memref, and upon final conversion, it will offset the memref automatically to make it consistent.</p> <p>This op is used in situations where earlier in a lowering, we have fully resolved the binding to a buffer and would just like the raw backing buffer as passed to the interface.</p> <p>Traits: <code>AlwaysSpeculatableImplTrait</code></p> <p>Interfaces: <code>ConditionallySpeculatable</code>, <code>NoMemoryEffect (MemoryEffectOpInterface)</code></p> <p>Effects: <code>MemoryEffects::Effect{}</code></p>"},{"location":"reference/mlir-dialects/VMVX/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription <code>set</code>::mlir::IntegerAttrindex attribute <code>binding</code>::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/VMVX/#results_1","title":"Results:","text":"Result Description <code>buffer</code> a reference counted byte buffer"},{"location":"community/blog/archive/2024/","title":"2024","text":""},{"location":"community/blog/archive/2021/","title":"2021","text":""},{"location":"community/blog/category/performance/","title":"Performance","text":""},{"location":"community/blog/category/platforms/","title":"Platforms","text":""},{"location":"community/blog/category/frontends/","title":"Frontends","text":""},{"location":"community/tags/","title":"Tags","text":"<p>Website pages sorted by tag:</p>"},{"location":"community/tags/#android","title":"Android","text":"<ul> <li>Android cross-compilation</li> <li>Android LLDB debugging</li> </ul>"},{"location":"community/tags/#cpu","title":"CPU","text":"<ul> <li>RISC-V cross-compilation</li> <li>IREE / MLIR / Linalg tutorial</li> <li>Exploring CPU microkernels on a matmul example</li> <li>Matrix Multiplication with MMT4D</li> <li>Profiling CPUs</li> <li>CPU - Bare-Metal</li> <li>CPU</li> </ul>"},{"location":"community/tags/#cuda","title":"CUDA","text":"<ul> <li>CUDA backend</li> <li>GPU debugging playbook</li> <li>CUDA HAL driver</li> <li>GPU - CUDA</li> </ul>"},{"location":"community/tags/#gpu","title":"GPU","text":"<ul> <li>CUDA backend</li> <li>Vulkan environment setup</li> <li>GPU debugging playbook</li> <li>CUDA HAL driver</li> <li>HIP HAL driver</li> <li>Metal HAL driver</li> <li>Profiling GPUs using Vulkan</li> <li>GPU - CUDA</li> <li>GPU - Metal</li> <li>GPU - ROCm</li> <li>GPU - Vulkan</li> </ul>"},{"location":"community/tags/#hip","title":"HIP","text":"<ul> <li>HIP HAL driver</li> </ul>"},{"location":"community/tags/#jax","title":"JAX","text":"<ul> <li>JAX</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#metal","title":"Metal","text":"<ul> <li>GPU debugging playbook</li> <li>Metal HAL driver</li> </ul>"},{"location":"community/tags/#onnx","title":"ONNX","text":"<ul> <li>ONNX</li> </ul>"},{"location":"community/tags/#pytorch","title":"PyTorch","text":"<ul> <li>ONNX</li> <li>PyTorch</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#python","title":"Python","text":"<ul> <li>JAX</li> <li>ONNX</li> <li>PyTorch</li> <li>TensorFlow</li> <li>TensorFlow Lite</li> <li>Python</li> </ul>"},{"location":"community/tags/#rocm","title":"ROCm","text":"<ul> <li>GPU debugging playbook</li> </ul>"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":"<ul> <li>TFLite support via TOSA</li> <li>TensorFlow</li> <li>TensorFlow Lite</li> <li>Extensions</li> <li>Glossary</li> </ul>"},{"location":"community/tags/#vulkan","title":"Vulkan","text":"<ul> <li>Vulkan environment setup</li> <li>GPU debugging playbook</li> <li>Profiling GPUs using Vulkan</li> <li>GPU - Vulkan</li> </ul>"},{"location":"community/tags/#web","title":"Web","text":"<ul> <li>Building with Emscripten</li> </ul>"},{"location":"community/tags/#ios","title":"iOS","text":"<ul> <li>iOS cross-compilation</li> <li>GPU - Metal</li> </ul>"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index f1cd6e08f2d1..8daf49fb01f8 100755
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,427 +2,427 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://iree.dev/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/building-from-source/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/building-from-source/android/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/building-from-source/getting-started/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/building-from-source/ios/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/building-from-source/riscv/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/2021-10-15-cuda-backend/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/2024-01-29-iree-mlir-linalg-tutorial/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/2024-01-22-exploring-cpu-microkernels-on-a-matmul-example/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/2021-10-13-matrix-multiplication-with-mmt4d/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/2021-07-19-tflite-support-via-tosa/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/usage-best-practices/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/vulkan-environment-setup/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/building/bazel/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/building/cmake-options/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/building/cmake-with-ccache/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/building/emscripten/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/android-with-lldb/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/compile-time-regressions/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/gpu/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/integration-tests/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/releases/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/debugging/sanitizers/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/cuda-hal-driver/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/design-roadmap/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/function-abi/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/hip-hal-driver/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/invocation-execution-model/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/design-docs/metal-hal-driver/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/general/contributing/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/general/developer-overview/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/general/developer-tips/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/general/release-management/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/general/testing-guide/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/benchmark-suites/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/benchmarking/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/profiling-cpu-events/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/profiling-gpu-vulkan/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/profiling-with-tracy/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/developers/performance/profiling/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/parameters/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/bare-metal/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/cpu/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/gpu-cuda/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/gpu-metal/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/gpu-rocm/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/deployment-configurations/gpu-vulkan/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/jax/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/onnx/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/pytorch/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/tensorflow/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/guides/ml-frameworks/tflite/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/extensions/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/glossary/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/optimization-options/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/bindings/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/bindings/c-api/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/bindings/python/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/Check/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/Flow/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/HAL/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/HALInline/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/HALLoader/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/IOParameters/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/IREEInput/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/IREEVectorExt/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/LinalgExt/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/Stream/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/Util/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/VM/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/reference/mlir-dialects/VMVX/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/archive/2024/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/archive/2021/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/category/performance/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/category/platforms/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/blog/category/frontends/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://iree.dev/community/tags/</loc>
-         <lastmod>2024-05-01</lastmod>
+         <lastmod>2024-05-09</lastmod>
          <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 0e4c912e7c97980252acf9e65e334d7d1ffde9a6..50c655c2f86ea2cfa09aa52d28e9d1485e12debd 100755
GIT binary patch
literal 1018
zcmV<W0|opaiwFpCbUbDP|8r?{Wo=<_E_iKh0M*({Z{s!)0O0%nioo}Y>~t4xio}be
z$f5yKwCHw&UPshOo|>=n;m7{^lCpQ>Lxc8I3=Jev2txY8NaV~=ckd5IU+pMFcJ_X;
zx?V1>$SOCmo$eQ(e*Xa9E#9x++~uGnrK{4}#{0!4CI5Zb?f3iZ$cYU4c<sW}MYSov
z#O~bKEw67E>o-^BQ%`U@nm>2isdYU|ET`c>bUo0nyX$K6WiO1$xM6*>ym<@Dcd)!I
zUCwP6-P8u{L}Q@8)&sIW-gOr(r^|O=um7JUeP&&U0LI`9M3;ji={&SUa6Cw7PLxt@
ziwQ#_@mzLMIx%omk{%xAjLDYMQKYHYZjuWt*I&VE39EOY@_^8z+L9eagUj_-aB~Z_
z3mDA-G+WdYq?}w})D7rwfDT?4ZF#no&tO;ux20f7M{pJp3?`Fx>4wH@YTyp~rji36
zps*8r%?eXl<YCWg1IDDcgYZ9M`7Nw&A&t636A-iaE+p77LUIuW*~=Gc=SvJF6O|)Q
z1U<zBKA=jhNT_s|^%gCV-EnZ%kWEmc&g^AszbwXGkDo~kH!6c$0_W=#5Sy^h^ORCp
zath&IGEoKgiL6ZMKz*L3Dm~{2r?J&~=%s_py5Lz8BsK(rra&>uy@Oxelh4wLY*QJ}
zQ%sP`;vjxZ18KrY;++voAj6-<#sdRIo-7Pb#cobAHmG3;yrUo=d_E2ihgxA0?;W#M
z^CN~m%hzt$H+46#-MMpGLkCiMJ}MZg;n=0!P%0xWeY7XC6iN!OoGr;@pKI+-w)ATg
zxH76crhO~klBGx4*7Fh!25reHYl0@*TjQEq(wQ<3LbHhSZFHdwsG_bXt2PG17GmZk
zHPZP<vDj(hQtN{oS%0bVl|DAbmeeJyfK!d6CH0(arhw|$kXjIRp1h(wkjH=}h>?}&
zoNWVn?T&K=u|zH-Px+MTHbw0*l%=+Yi7ly>sM5#3a_gL0v*6au&D|Fj9h@?)nZ+%*
zXSLBV)(Lp;!Zu3h{fP(ZL}#sdqdz7W)J8fpk&P~l+U><l=iOyN%0MFpvP!0j4)tFv
zERK5NP1?nX;V2SU_g)QuK2<B_ZN$Dnd$x63Q#eimdX`Nrv-QSVk;eHot$|UKO0faq
zj~i0k2Am&ct$D98fBNC64dtWNY^e?FlfyyOHqL9-$6tPxSh{(Y``g3AqxCs8Vtp=0
zE<7As@IK9N>b7D1m;#|`0QxDhZh-mxa}&ttKjglrj3pW89nJ5NH=$g~RVqw0xstqH
o?#DEs)tD+bHeeltvn7EaWLgvcA35jzBT`!b4Lj0G#rrq_0Igg100000

literal 1018
zcmV<W0|opaiwFpC=rLvj|8r?{Wo=<_E_iKh0M*({Z{s!)0O0%niokb9cDjppi^Pkf
z$f5yKwCHw&UPshOo|>-?Nk8)IOUmAj4-MK=F*J}wAqeRUBat&h-F`S&bG1W`<lWt3
zb-i3%q0@dKH{C5h|Nc>ZxA?GrbDP;jOjo6|i+77nO5yvi+wb?+krG-6@!Ip$MZGD%
z#O~bKEwA4%)^D!Lr=H++G=FZ5*Xw$gSWZKXcs<~*yX|W8WiN=y`eA*uym_mZ@6>Wt
zx}4iCy6FwL2}j1i)&r0^-gXx)r^|O=um7JUeI`?fK#lCJiaxVO={#@)dm5xOCrqif
z#RQyCJePcwPGpK&(!-;@wb@ZRiZu1cPjX@9`YW|ss?|HC^8l&`y+t>O2AAuv)XjTU
zyC`djl_3XaQYk0T1f~J~jlx4PMO&UN9kQ~d*>8~@ny9=}pe!VtP3Z<U7;NAU`lgbZ
z4yv#d1w$HAS>)B8(neXE-VVb5h~>9x^<JgXlxRZ59D?Vhb_6Q<2!ib83ylvYhLVZO
z5hheU#-sv+PNY$&beGK*oI<xF_RgY9szjYR$kcvWjJqD5(Fiwc3tLn^)F~h~VV&nG
zt)b);!o6stW(o<NOz41po~9~2=Ln~<HD>6ggUh-Q$e>DOQ87-8G0MGzU))p3(uw3!
z8P8Kpkjmm9eoPq+LPYV-2qloAXR-0XfRQGr2Cri`Cm9<sYGB$S%Lkv2*~3sPOya#`
zc6xrqpeOm-4TYxe268)pPHXUhI?qQ1BQ+emv|CJNq@|Dcgbul+@Y=hQO!m3f?qo~9
zHYs05bw{{w#apuUDBF5of|UhVa>|;Z$@bQ`rj~T3OoPxYqI?@YmjTt-_2~4*0&i8!
zl%z&F|0otaEnI2?`;p9-8ei$-Qfx_GvI=#ok+h_qlg-4Sj}55>QRm4k$^!*pC_#*@
zJm+j1Fc^QFD~Kg>8EML=Ot&d&4_ubo8YZ@+)}l%u|H`d%YR!UMGdFi%R5W{STQiGW
zaL;OM)L19rz2|L|&ifM%(uv+V@y2jWp7lmLGog!~N8|V6rStBxASL67j84lm;i3L(
zg~d@Xyvg_&fsZ0_b??>E^Ql@XZzJ{v+LNo>n!<5fK_J=0GFxxF6KR}Z(;5g2>J%Fg
z{<uNCZNT|a)|&Sk^QRvk+fY6@Lk`=pK6)5LZR5OVefZ^9iKUxYxxd}tKe&)nBi5&K
z<oW*4g7<NLQ@0K4r^E=h0qEyMrUB;D&rKko{*e2gGL&STb~wLB-h^@`SE(W4<V*5)
oxgXPj)<CM<*no9p?@9tc$h0Q-KXT6bN5r)L8wg2z$on_|0RF}V#sB~S


Branch type	Naming scheme	Example
Single user	`users/[username]/*`	`users/cooldeveloper/my-awesome-feature`
Shared feature branch	`shared/*`	`shared/pytorch-performance-sprint`
Dependency updates	`integrates/*`	`integrates/integrate-llvm-20240501`