diff --git a/reference/bindings/c-api/index.html b/reference/bindings/c-api/index.html index e087aff3e726..670cbccb587d 100755 --- a/reference/bindings/c-api/index.html +++ b/reference/bindings/c-api/index.html @@ -1366,20 +1366,6 @@
This snippet shows the general layout of the API. For working examples, see the samples below.
To build a custom tool using the compiler API:
-set(_IREE_COMPILER_API "${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler")
+
+
+CMakeLists.txt 1
+2
+3
set(_IREE_COMPILER_API "${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler")
target_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_COMPILER_API})
target_link_libraries(${_NAME} iree_compiler_bindings_c_loader)
-
-iree_compiler_demo.c#include <iree/compiler/embedding_api.h>
+
+iree_compiler_demo.c 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
#include <iree/compiler/embedding_api.h>
#include <iree/compiler/loader.h>
int main(int argc, char** argv) {
@@ -3862,73 +3869,7 @@ Compiler session API ireeCompilerSessionDestroy(session);
ireeCompilerGlobalShutdown();
}
-
-Compiler pluginslink
-
-This snippet comes from the
-example compiler plugin.
-For other examples, see the samples below.
-
-To add a compiler plugin that extends the compiler with custom options:
-samples/compiler_plugins/example/CMakeLists.txtiree_cc_library(
- NAME
- registration
- SRCS
- "src/PluginRegistration.cpp"
- DEPS
- ::defs
- MLIRIR
- iree::compiler::PluginAPI
- PUBLIC
-)
-
-iree_compiler_register_plugin(
- PLUGIN_ID
- example
- TARGET
- ::registration
-)
-
-samples/compiler_plugins/example/src/PluginRegistration.cpp#include "iree/compiler/PluginAPI/Client.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-
-using namespace mlir;
-using namespace mlir::iree_compiler;
-
-namespace {
-
-struct MyOptions {
- bool flag = false;
-
- void bindOptions(OptionsBinder &binder) {
- static llvm::cl::OptionCategory category("IREE Example Plugin");
- binder.opt<bool>("iree-example-flag", flag,
- llvm::cl::desc("Dummy flag for the example plugin"),
- llvm::cl::cat(category));
- }
-};
-
-struct MySession : public PluginSession<MySession, MyOptions> {
- LogicalResult onActivate() override {
- mlir::emitRemark(mlir::UnknownLoc::get(context))
- << "This remark is from the example plugin activation (flag="
- << options.flag << ")";
- return success();
- }
-};
-
-} // namespace
-
-IREE_DEFINE_COMPILER_OPTION_FLAGS(MyOptions);
-
-extern "C" bool iree_register_compiler_plugin_example(
- mlir::iree_compiler::PluginRegistrar *registrar) {
- registrar->registerPlugin<MySession>("example");
- return true;
-}
-
+
Sampleslink
@@ -4218,57 +4159,814 @@ HALlink
Usagelink
-This snippet shows the general layout of the API. For working examples, see
-the samples below.
+For other examples, see the samples below.
-CMakeLists.txttarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_RUNTIME_ROOT})
-target_link_libraries(${_NAME} iree_runtime_runtime)
-
-iree_runtime_demo.c#include <iree/runtime/api.h>
+
+
+
+Source file: runtime/src/iree/runtime/demo/hello_world_terse.c
+runtime/src/iree/runtime/demo/hello_world_terse.c 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
#include <stdio.h>
+
+#include "iree/runtime/api.h"
+#include "iree/runtime/testdata/simple_mul_module_c.h"
+
+static void iree_runtime_demo_run_session(iree_runtime_instance_t* instance);
+static void iree_runtime_demo_perform_mul(iree_runtime_session_t* session);
+
+//===----------------------------------------------------------------------===//
+// 1. Entry point / shared iree_runtime_instance_t setup
+//===----------------------------------------------------------------------===//
int main(int argc, char** argv) {
- // Setup the shared runtime instance.
+ // Create and configure the instance shared across all sessions.
iree_runtime_instance_options_t instance_options;
iree_runtime_instance_options_initialize(&instance_options);
iree_runtime_instance_options_use_all_available_drivers(&instance_options);
iree_runtime_instance_t* instance = NULL;
- iree_runtime_instance_create(
+ IREE_CHECK_OK(iree_runtime_instance_create(
+ &instance_options, iree_allocator_system(), &instance));
+
+ // All sessions should share the same instance.
+ iree_runtime_demo_run_session(instance);
+
+ iree_runtime_instance_release(instance);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// 2. Load modules and initialize state in iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+
+static void iree_runtime_demo_run_session(iree_runtime_instance_t* instance) {
+ // TODO(#5724): move device selection into the compiled modules.
+ iree_hal_device_t* device = NULL;
+ IREE_CHECK_OK(iree_runtime_instance_try_create_default_device(
+ instance, iree_make_cstring_view("local-task"), &device));
+
+ // Create one session per loaded module to hold the module state.
+ iree_runtime_session_options_t session_options;
+ iree_runtime_session_options_initialize(&session_options);
+ iree_runtime_session_t* session = NULL;
+ IREE_CHECK_OK(iree_runtime_session_create_with_device(
+ instance, &session_options, device,
+ iree_runtime_instance_host_allocator(instance), &session));
+ iree_hal_device_release(device);
+
+ // Load your user module into the session (from memory, from file, etc).
+ const iree_file_toc_t* module_file =
+ iree_runtime_testdata_simple_mul_module_create();
+ IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_memory(
+ session, iree_make_const_byte_span(module_file->data, module_file->size),
+ iree_allocator_null()));
+
+ // Run your functions; you should reuse the session to make multiple calls.
+ iree_runtime_demo_perform_mul(session);
+
+ iree_runtime_session_release(session);
+}
+
+//===----------------------------------------------------------------------===//
+// 3. Call a function within a module with buffer views
+//===----------------------------------------------------------------------===//
+
+// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->
+// tensor<4xf32>
+static void iree_runtime_demo_perform_mul(iree_runtime_session_t* session) {
+ iree_runtime_call_t call;
+ IREE_CHECK_OK(iree_runtime_call_initialize_by_name(
+ session, iree_make_cstring_view("module.simple_mul"), &call));
+
+ // %arg0: tensor<4xf32>
+ iree_hal_buffer_view_t* arg0 = NULL;
+ static const iree_hal_dim_t arg0_shape[1] = {4};
+ static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(
+ iree_runtime_session_device(session),
+ iree_runtime_session_device_allocator(session),
+ IREE_ARRAYSIZE(arg0_shape), arg0_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_ALL,
+ .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,
+ },
+ iree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &arg0));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, arg0, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg0));
+ iree_hal_buffer_view_release(arg0);
+
+ fprintf(stdout, "\n * \n");
+
+ // %arg1: tensor<4xf32>
+ iree_hal_buffer_view_t* arg1 = NULL;
+ static const iree_hal_dim_t arg1_shape[1] = {4};
+ static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(
+ iree_runtime_session_device(session),
+ iree_runtime_session_device_allocator(session),
+ IREE_ARRAYSIZE(arg1_shape), arg1_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_ALL,
+ .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,
+ },
+ iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, arg1, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ IREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg1));
+ iree_hal_buffer_view_release(arg1);
+
+ IREE_CHECK_OK(iree_runtime_call_invoke(&call, /*flags=*/0));
+
+ fprintf(stdout, "\n = \n");
+
+ // -> tensor<4xf32>
+ iree_hal_buffer_view_t* ret0 = NULL;
+ IREE_CHECK_OK(iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0));
+ IREE_CHECK_OK(iree_hal_buffer_view_fprint(
+ stdout, ret0, /*max_element_count=*/4096,
+ iree_runtime_session_host_allocator(session)));
+ iree_hal_buffer_view_release(ret0);
+
+ iree_runtime_call_deinitialize(&call);
+}
+
+
+
+Source file: runtime/src/iree/runtime/demo/hello_world_explained.c
+runtime/src/iree/runtime/demo/hello_world_explained.c 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
#include <stdio.h>
+
+#include "iree/runtime/api.h"
+
+static int iree_runtime_demo_main(void);
+static iree_status_t iree_runtime_demo_run_session(
+ iree_runtime_instance_t* instance);
+static iree_status_t iree_runtime_demo_perform_mul(
+ iree_runtime_session_t* session);
+
+#if defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG)
+
+static const char* demo_file_path = NULL;
+
+// Takes the first argument on the command line as a file path and loads it.
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ fprintf(stderr, "usage: session_demo module_file.vmfb\n");
+ return 1;
+ }
+ demo_file_path = argv[1];
+ return iree_runtime_demo_main();
+}
+
+// Loads a compiled IREE module from the file system.
+static iree_status_t iree_runtime_demo_load_module(
+ iree_runtime_session_t* session) {
+ return iree_runtime_session_append_bytecode_module_from_file(session,
+ demo_file_path);
+}
+
+#elif defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA)
+
+#include "iree/runtime/testdata/simple_mul_module_c.h"
+
+int main(int argc, char** argv) { return iree_runtime_demo_main(); }
+
+// Loads the bytecode module directly from memory.
+//
+// Embedding the compiled output into your binary is not always possible (or
+// recommended) but is a fairly painless way to get things working on a variety
+// of targets without worrying about how to deploy files or pass flags.
+//
+// In cases like this the module file is in .rodata and does not need to be
+// freed; if the memory needs to be released when the module is unloaded then a
+// custom allocator can be provided to get a callback instead.
+static iree_status_t iree_runtime_demo_load_module(
+ iree_runtime_session_t* session) {
+ const iree_file_toc_t* module_file =
+ iree_runtime_testdata_simple_mul_module_create();
+ return iree_runtime_session_append_bytecode_module_from_memory(
+ session, iree_make_const_byte_span(module_file->data, module_file->size),
+ iree_allocator_null());
+}
+
+#else
+#error "must specify a way to load the module data"
+#endif // IREE_RUNTIME_DEMO_LOAD_FILE_FROM_*
+
+//===----------------------------------------------------------------------===//
+// 1. Entry point / shared iree_runtime_instance_t setup
+//===----------------------------------------------------------------------===//
+// Applications should create and share a single instance across all sessions.
+
+// This would live in your application startup/shutdown code or scoped to the
+// usage of IREE. Creating and destroying instances is expensive and should be
+// avoided.
+static int iree_runtime_demo_main(void) {
+ // Set up the shared runtime instance.
+ // An application should usually only have one of these and share it across
+ // all of the sessions it has. The instance is thread-safe, while the
+ // sessions are only thread-compatible (you need to lock if its required).
+ iree_runtime_instance_options_t instance_options;
+ iree_runtime_instance_options_initialize(&instance_options);
+ iree_runtime_instance_options_use_all_available_drivers(&instance_options);
+ iree_runtime_instance_t* instance = NULL;
+ iree_status_t status = iree_runtime_instance_create(
&instance_options, iree_allocator_system(), &instance);
- // Create the HAL device used to run the workloads.
+ // Run the demo.
+ // A real application would load its models (at startup, on-demand, etc) and
+ // retain them somewhere to be reused. Startup time and likelihood of failure
+ // varies across different HAL backends; the synchronous CPU backend is nearly
+ // instantaneous and will never fail (unless out of memory) while the Vulkan
+ // backend may take significantly longer and fail if there are not supported
+ // devices.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_run_session(instance);
+ }
+
+ // Release the shared instance - it will be deallocated when all sessions
+ // using it have been released (here it is deallocated immediately).
+ iree_runtime_instance_release(instance);
+
+ int ret = (int)iree_status_code(status);
+ if (!iree_status_is_ok(status)) {
+ // Dump nice status messages to stderr on failure.
+ // An application can route these through its own logging infrastructure as
+ // needed. Note that the status is a handle and must be freed!
+ iree_status_fprint(stderr, status);
+ iree_status_ignore(status);
+ }
+ return ret;
+}
+
+//===----------------------------------------------------------------------===//
+// 2. Load modules and initialize state in iree_runtime_session_t
+//===----------------------------------------------------------------------===//
+// Each instantiation of a module will live in its own session. Module state
+// like variables will be retained across calls within the same session.
+
+// Loads the demo module and uses it to perform some math.
+// In a real application you'd want to hang on to the iree_runtime_session_t
+// and reuse it for future calls - especially if it holds state internally.
+static iree_status_t iree_runtime_demo_run_session(
+ iree_runtime_instance_t* instance) {
+ // TODO(#5724): move device selection into the compiled modules.
iree_hal_device_t* device = NULL;
- iree_runtime_instance_try_create_default_device(
- instance, iree_make_cstring_view("local-task"), &device);
+ IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(
+ instance, iree_make_cstring_view("local-task"), &device));
- // Create a session to hold the module state.
+ // Set up the session to run the demo module.
+ // Sessions are like OS processes and are used to isolate modules from each
+ // other and hold runtime state such as the variables used within the module.
+ // The same module loaded into two sessions will see their own private state.
iree_runtime_session_options_t session_options;
iree_runtime_session_options_initialize(&session_options);
iree_runtime_session_t* session = NULL;
- iree_runtime_session_create_with_device(
+ iree_status_t status = iree_runtime_session_create_with_device(
instance, &session_options, device,
iree_runtime_instance_host_allocator(instance), &session);
+ iree_hal_device_release(device);
- // Load the compiled user module from a file.
- iree_runtime_session_append_bytecode_module_from_file(
- session, "program.vmfb");
+ // Load the compiled user module in a demo-specific way.
+ // Applications could specify files, embed the outputs directly in their
+ // binaries, fetch them over the network, etc.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_load_module(session);
+ }
// Build and issue the call.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_demo_perform_mul(session);
+ }
+
+ // Release the session and free all resources.
+ iree_runtime_session_release(session);
+ return status;
+}
+
+//===----------------------------------------------------------------------===//
+// 3. Call a function within a module with buffer views
+//===----------------------------------------------------------------------===//
+// The inputs and outputs of a call are reusable across calls (and possibly
+// across sessions depending on device compatibility) and can be setup by the
+// application as needed. For example, an application could perform
+// multi-threaded buffer view creation and then issue the call from a single
+// thread when all inputs are ready. This simple demo just allocates them
+// per-call and throws them away.
+
+// Sets up and calls the simple_mul function and dumps the results:
+// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->
+// tensor<4xf32>
+//
+// NOTE: this is a demo and as such this performs no memoization; a real
+// application could reuse a lot of these structures and cache lookups of
+// iree_vm_function_t to reduce the amount of per-call overhead.
+static iree_status_t iree_runtime_demo_perform_mul(
+ iree_runtime_session_t* session) {
+ // Initialize the call to the function.
iree_runtime_call_t call;
- iree_runtime_call_initialize_by_name(
- session, iree_make_cstring_view("module.entry_function_name"), &call);
- // iree_runtime_call_inputs_push_back_buffer_view(...);
- iree_runtime_call_invoke(&call, /*flags=*/0);
+ IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(
+ session, iree_make_cstring_view("module.simple_mul"), &call));
+
+ // Append the function inputs with the HAL device allocator in use by the
+ // session. The buffers will be usable within the session and _may_ be usable
+ // in other sessions depending on whether they share a compatible device.
+ iree_hal_device_t* device = iree_runtime_session_device(session);
+ iree_hal_allocator_t* device_allocator =
+ iree_runtime_session_device_allocator(session);
+ iree_allocator_t host_allocator =
+ iree_runtime_session_host_allocator(session);
+ iree_status_t status = iree_ok_status();
+ {
+ // %arg0: tensor<4xf32>
+ iree_hal_buffer_view_t* arg0 = NULL;
+ if (iree_status_is_ok(status)) {
+ static const iree_hal_dim_t arg0_shape[1] = {4};
+ static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ status = iree_hal_buffer_view_allocate_buffer_copy(
+ device, device_allocator,
+ // Shape rank and dimensions:
+ IREE_ARRAYSIZE(arg0_shape), arg0_shape,
+ // Element type:
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ // Encoding type:
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ // Where to allocate (host or device):
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ // Access to allow to this memory:
+ .access = IREE_HAL_MEMORY_ACCESS_ALL,
+ // Intended usage of the buffer (transfers, dispatches, etc):
+ .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,
+ },
+ // The actual heap buffer to wrap or clone and its allocator:
+ iree_make_const_byte_span(arg0_data, sizeof(arg0_data)),
+ // Buffer view + storage are returned and owned by the caller:
+ &arg0);
+ }
+ if (iree_status_is_ok(status)) {
+ IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(
+ stdout, arg0, /*max_element_count=*/4096, host_allocator));
+ // Add to the call inputs list (which retains the buffer view).
+ status = iree_runtime_call_inputs_push_back_buffer_view(&call, arg0);
+ }
+ // Since the call retains the buffer view we can release it here.
+ iree_hal_buffer_view_release(arg0);
+
+ fprintf(stdout, "\n * \n");
+
+ // %arg1: tensor<4xf32>
+ iree_hal_buffer_view_t* arg1 = NULL;
+ if (iree_status_is_ok(status)) {
+ static const iree_hal_dim_t arg1_shape[1] = {4};
+ static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ status = iree_hal_buffer_view_allocate_buffer_copy(
+ device, device_allocator, IREE_ARRAYSIZE(arg1_shape), arg1_shape,
+ IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+ IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+ (iree_hal_buffer_params_t){
+ .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
+ .access = IREE_HAL_MEMORY_ACCESS_ALL,
+ .usage = IREE_HAL_BUFFER_USAGE_DEFAULT,
+ },
+ iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1);
+ }
+ if (iree_status_is_ok(status)) {
+ IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(
+ stdout, arg1, /*max_element_count=*/4096, host_allocator));
+ status = iree_runtime_call_inputs_push_back_buffer_view(&call, arg1);
+ }
+ iree_hal_buffer_view_release(arg1);
+ }
- // Retrieve the function outputs and clean up the call.
- // iree_runtime_call_outputs_pop_front_buffer_view(...);
- iree_runtime_call_deinitialize(&call);
+ // Synchronously perform the call.
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_call_invoke(&call, /*flags=*/0);
+ }
- // Cleanup state.
- iree_runtime_session_release(session);
- iree_hal_device_release(device);
- iree_runtime_instance_release(instance);
+ fprintf(stdout, "\n = \n");
+
+ // Dump the function outputs.
+ iree_hal_buffer_view_t* ret0 = NULL;
+ if (iree_status_is_ok(status)) {
+ // Try to get the first call result as a buffer view.
+ status = iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0);
+ }
+ if (iree_status_is_ok(status)) {
+ // This prints the buffer view out but an application could read its
+ // contents, pass it to another call, etc.
+ status = iree_hal_buffer_view_fprint(
+ stdout, ret0, /*max_element_count=*/4096, host_allocator);
+ }
+ iree_hal_buffer_view_release(ret0);
+
+ iree_runtime_call_deinitialize(&call);
+ return status;
}
-
+
+
+
+
Sampleslink
diff --git a/search/search_index.json b/search/search_index.json
index 8c4b07eb6cce..bfa719c955e5 100755
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"IREE","text":"IREE (Intermediate Representation Execution Environment1) is an MLIR-based end-to-end compiler and runtime that lowers Machine Learning (ML) models to a unified IR that scales up to meet the needs of the datacenter and down to satisfy the constraints and special considerations of mobile and edge deployments.
"},{"location":"#key-features","title":"Key features","text":" - Ahead-of-time compilation of scheduling and execution logic together
- Support for dynamic shapes, flow control, streaming, and other advanced model features
- Optimized for many CPU and GPU architectures
- Low overhead, pipelined execution for efficient power and resource usage
- Binary size as low as 30KB on embedded systems
- Debugging and profiling support
"},{"location":"#support-matrix","title":"Support matrix","text":"IREE supports importing from a variety of ML frameworks:
- TensorFlow
- TensorFlow Lite
- JAX
- PyTorch
- ONNX (hoped for)
The IREE compiler tools run on Linux, Windows, and macOS and can generate efficient code for a variety of runtime platforms:
- Linux
- Windows
- Android
- macOS
- iOS
- Bare metal
- WebAssembly (planned)
and architectures:
- ARM
- x86
- RISC-V
Support for hardware accelerators and APIs is also included:
- Vulkan
- CUDA
- Metal (for Apple silicon devices)
- WebGPU (planned)
"},{"location":"#project-architecture","title":"Project architecture","text":"IREE adopts a holistic approach towards ML model compilation: the IR produced contains both the scheduling logic, required to communicate data dependencies to low-level parallel pipelined hardware/API like Vulkan, and the execution logic, encoding dense computation on the hardware in the form of hardware/API-specific binaries like SPIR-V.
"},{"location":"#workflow-overview","title":"Workflow overview","text":"Using IREE involves the following general steps:
-
Import your model
Develop your program using one of the supported frameworks, then import into IREE
-
Select your deployment configuration
Identify your target platform, accelerator(s), and other constraints
-
Compile your model
Compile through IREE, picking settings based on your deployment configuration
-
Run your model
Use IREE's runtime components to execute your compiled model
"},{"location":"#importing-models-from-ml-frameworks","title":"Importing models from ML frameworks","text":"IREE supports importing models from a growing list of ML frameworks and model formats:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
"},{"location":"#selecting-deployment-configurations","title":"Selecting deployment configurations","text":"IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE for dynamic model deployments taking advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.
- What platforms are you targeting? Desktop? Mobile? An embedded system?
- What hardware should the bulk of your model run on? CPU? GPU?
- How fixed is your model itself? Can the weights be changed? Do you want to support loading different model architectures dynamically?
IREE supports the full set of these configurations using the same underlying technology.
"},{"location":"#compiling-models","title":"Compiling models","text":"Model compilation is performed ahead-of-time on a host machine for any combination of targets. The compilation process converts from layers and operators used by high level frameworks down into optimized native code and associated scheduling logic.
For example, compiling for GPU execution using Vulkan generates SPIR-V kernels and Vulkan API calls. For CPU execution, native code with static or dynamic linkage and the associated function calls are generated.
"},{"location":"#running-models","title":"Running models","text":"IREE offers a low level C API, as well as several sets of API bindings for compiling and running programs using various languages.
"},{"location":"#communication-channels","title":"Communication channels","text":" - GitHub issues: Feature requests, bugs, and other work tracking
- IREE Discord server: Daily development discussions with the core team and collaborators
- iree-discuss email list: Announcements, general and low-priority discussion
"},{"location":"#roadmap","title":"Roadmap","text":"IREE is in the early stages of development and is not yet ready for broad adoption. We use both GitHub Projects and GitHub Milestones to track progress.
-
Pronounced \"eerie\" and often styled with the emoji\u00a0\u21a9
"},{"location":"building-from-source/","title":"Building from source","text":"While IREE does offer binary distributions for its compiler tools and Python bindings, building from source is still useful when using IREE's runtime or when making changes to the compiler or import tools themselves.
"},{"location":"building-from-source/#reference-pages","title":"Reference pages","text":" - Getting started
- Android cross-compilation
- iOS cross-compilation
- RISC-V cross-compilation
"},{"location":"building-from-source/android/","title":"Android cross-compilation","text":"Running on a platform like Android involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific Android version and system architecture):
- IREE's compiler is built on the host and is used there to generate modules for the target
- IREE's runtime is built on the host for the target. The runtime is then either pushed to the target to run natively or is bundled into an Android APK
","tags":["Android"]},{"location":"building-from-source/android/#prerequisites","title":"Prerequisites","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-environment-setup","title":"Host environment setup","text":"You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.
","tags":["Android"]},{"location":"building-from-source/android/#install-android-ndk-and-adb","title":"Install Android NDK and ADB","text":"The Android Native Developer Kit (NDK) is needed to use native C/C++ code on Android. You can download it here, or, if you have installed Android Studio, you can follow this guide instead.
Note
Make sure the ANDROID_NDK
environment variable is set after installing the NDK.
ADB (the Android Debug Bridge) is also needed to communicate with Android devices from the command line. Install it following the official user guide.
","tags":["Android"]},{"location":"building-from-source/android/#configure-and-build","title":"Configure and build","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-configuration","title":"Host configuration","text":"Build and install on your host machine:
cmake -GNinja -B ../iree-build/ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n.\ncmake --build ../iree-build/ --target install\n
","tags":["Android"]},{"location":"building-from-source/android/#target-configuration","title":"Target configuration","text":"Build the runtime using the Android NDK toolchain:
Linux macOS Windows cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"%ANDROID_NDK%/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"%CD%/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
Note
See the Android NDK CMake guide and Android Studio CMake guide for details on configuring CMake for Android.
The specific ANDROID_ABI
and ANDROID_PLATFORM
used should match your target device.
","tags":["Android"]},{"location":"building-from-source/android/#running-android-tests","title":"Running Android tests","text":"Make sure you enable developer options and USB debugging on your Android device and can see your it when you run adb devices
, then run all tests through ctest:
# Build test dependencies\ncmake --build ../iree-build-android/ --target iree-test-deps\n\n# Ensure that your Android device is visible\nadb devices\n\n# Run tests\nctest --test-dir ../iree-build-android/ --output-on-failure\n
This will automatically upload build artifacts to the connected Android device, run the tests, then report the status back to your host machine.
","tags":["Android"]},{"location":"building-from-source/android/#running-tools-directly","title":"Running tools directly","text":"Invoke the host compiler tools to produce a bytecode module FlatBuffer:
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
Push the Android runtime tools to the device, along with any FlatBuffer files:
adb push ../iree-build-android/tools/iree-run-module /data/local/tmp/\nadb shell chmod +x /data/local/tmp/iree-run-module\nadb push /tmp/simple_abs_vmvx.vmfb /data/local/tmp/\n
Run the tool:
adb shell /data/local/tmp/iree-run-module --device=local-task \\\n--module=/data/local/tmp/simple_abs_vmvx.vmfb \\\n--function=abs \\\n--input=\"f32=-5\"\n
","tags":["Android"]},{"location":"building-from-source/getting-started/","title":"Getting started","text":""},{"location":"building-from-source/getting-started/#prerequisites","title":"Prerequisites","text":"IREE can be built from source using CMake. We also recommend the Ninja CMake generator and the clang or MSVC C/C++ compilers.
Note - Other CMake generators and compilers IREE developers and CIs primarily use Ninja, clang, and MSVC. Other configurations (including the Makefile generator and gcc) are \"best effort\". Patches to improve support are always welcome.
Linux macOS Windows -
Install a compiler/linker (typically \"clang\" and \"lld\" package)
-
Install CMake (typically \"cmake\" package)
-
Install Ninja (typically \"ninja-build\" package)
On Debian/Ubuntu:
sudo apt install cmake ninja-build clang lld\n
-
Install CMake
-
Install Ninja
If using Homebrew:
brew install cmake ninja\n
-
Install MSVC from Visual Studio or \"Tools for Visual Studio\" on the official downloads page
-
Install CMake from the official downloads page
-
Install Ninja from the official site
Note
Initialize MSVC by running vcvarsall.bat
to build on the command line. See the official documentation for details.
"},{"location":"building-from-source/getting-started/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":"Use Git to clone the IREE repository and initialize its submodules:
git clone https://github.com/openxla/iree.git\ncd iree\ngit submodule update --init\n
The most basic CMake workflow is:
# Configure\ncmake -G Ninja -B ../iree-build/ .\n\n# Build\ncmake --build ../iree-build/\n
Caution - slow builds
The compiler build is complex. You will want a powerful machine and to tune the settings following the next section. In 2023, we've seen builds take around 5-10 minutes on 64-core Linux machines.
Use case permitting, disabling the compiler build with -DIREE_BUILD_COMPILER=OFF
will drastically simplify the build.
"},{"location":"building-from-source/getting-started/#configuration-settings","title":"Configuration settings","text":"The configure step should be customized for your build environment. These settings can improve compile and link times substantially.
Linux macOS Windows # Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON \\\n-DIREE_ENABLE_SPLIT_DWARF=ON \\\n-DIREE_ENABLE_THIN_ARCHIVES=ON \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON\n
# Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON \\\n-DIREE_ENABLE_SPLIT_DWARF=ON \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON\n
It is also possible to add -DIREE_ENABLE_THIN_ARCHIVES=ON
if the CMAKE_AR
variable is defined and points to the path of either the GNU binutils or LLVM ar
program, overriding the default Apple ar
.
# Recommended development options:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON\n
Tip - CMAKE_BUILD_TYPE values We recommend using the RelWithDebInfo
build type by default for a good balance of debug info and performance. The Debug
, Release
, and MinSizeRel
build types are useful in more specific cases. Note that several useful LLVM debugging features are only available in Debug
builds. See the official CMake documentation for general details.
Tip - Faster recompilation with ccache We recommend using ccache
with CMake, especially when rebuilding the compiler. To use it, configure CMake with:
-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n
See also our developer documentation for ccache.
"},{"location":"building-from-source/getting-started/#optional-components","title":"Optional components","text":"By default, the CMake build includes:
- All compiler targets (
llvm-cpu
, cuda
, vulkan-spirv
, etc.) - All runtime HAL drivers (
local-task
, cuda
, vulkan
, etc.) - All compiler input formats (StableHLO, TOSA, etc.)
- All compiler output formats (VM bytecode, C)
The default build does not include:
- Compiler or runtime bindings (Python, TFLite, etc.)
- Advanced features like AddressSanitizer or tracing instrumentation
- Experimental components
These can be changed via the IREE_
CMake options listed in the root CMakeLists.txt
.
"},{"location":"building-from-source/getting-started/#extensions-and-integrations","title":"Extensions and integrations","text":"When using IREE within other projects, you can register compiler plugins and runtime HAL drivers. You can also bring your own copy of LLVM and some other tools. See the root CMakeLists.txt
for details.
"},{"location":"building-from-source/getting-started/#tests-and-samples","title":"Tests and samples","text":""},{"location":"building-from-source/getting-started/#running-tests","title":"Running tests","text":"Tests are run via ctest. To build and run the core project tests:
# Build default targets\ncmake --build ../iree-build/\n\n# Run tests\nctest --test-dir ../iree-build/\n
Caution
This has two limitations:
- Large tests are excluded from the build by default
- Some tests require hardware like a GPU and will fail on unsupported systems
To build and then run all tests:
# 1. Build default targets\ncmake --build ../iree-build/\n\n# 2. Build test dependencies\ncmake --build ../iree-build/ --target iree-test-deps\n\n# 3. Run tests\nctest --test-dir ../iree-build/\n\n\n# Or combine all steps using a utility target\ncmake --build ../iree-build --target iree-run-tests\n
To run only certain tests, we have a helper script that converts environment variables into ctest filters:
# Run default tests\n./build_tools/cmake/ctest_all.sh ../iree-build\n\n# Run tests, turning CUDA on and Vulkan off\nexport IREE_CUDA_DISABLE=0\nexport IREE_VULKAN_DISABLE=1\n./build_tools/cmake/ctest_all.sh ../iree-build\n
"},{"location":"building-from-source/getting-started/#running-samples","title":"Running samples","text":"# Build\ncmake --build ../iree-build/\n\n# Run a standalone sample application\n../iree-build/runtime/src/iree/runtime/demo/hello_world_embedded\n# 4xf32=1 1.1 1.2 1.3\n# *\n# 4xf32=10 100 1000 10000\n# =\n# 4xf32=10 110 1200 13000\n\n# Try out the developer tools\nls ../iree-build/tools/\n../iree-build/tools/iree-compile --help\n../iree-build/tools/iree-run-module --help\n
"},{"location":"building-from-source/getting-started/#python-bindings","title":"Python bindings","text":"Python packages can either be built from source or installed from our releases. See the Python bindings page for details about the bindings themselves.
"},{"location":"building-from-source/getting-started/#dependencies","title":"Dependencies","text":"You will need a recent Python installation >=3.9 (we aim to support non-eol Python versions).
Tip - Managing Python versions Make sure your 'python' is what you expect:
Linux macOS Windows Note that on multi-python systems, this may have a version suffix, and on many Linuxes where python2 and python3 can co-exist, you may also want to use python3
.
which python\npython --version\n
Note that on multi-python systems, this may have a version suffix, and on macOS where python2 and python3 can co-exist, you may also want to use python3
.
which python\npython --version\n
The Python launcher for Windows (py
) can help manage versions.
which python\npython --version\npy --list-paths\n
Tip - Virtual environments We recommend using virtual environments to manage python packages, such as through venv
(about, tutorial):
Linux macOS Windows python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\n.venv\\Scripts\\activate.bat\n
When done, run deactivate
.
# Upgrade PIP before installing other requirements\npython -m pip install --upgrade pip\n\n# Install IREE build requirements\npython -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
"},{"location":"building-from-source/getting-started/#building-with-cmake","title":"Building with CMake","text":"To build the Python bindings, configure CMake with the IREE_BUILD_PYTHON_BINDINGS
option. We also recommend explicitly setting which Python executable to use with Python3_EXECUTABLE
:
# Configure (including other options as discussed above)\ncmake -G Ninja -B ../iree-build/ \\\n-DIREE_BUILD_PYTHON_BINDINGS=ON \\\n-DPython3_EXECUTABLE=\"$(which python)\" \\\n.\n\n# Build\ncmake --build ../iree-build/\n
"},{"location":"building-from-source/getting-started/#using-the-python-bindings","title":"Using the Python bindings","text":"Extend your PYTHONPATH
with IREE's bindings/python
paths and try importing:
Linux macOS Windows source ../iree-build/.env && export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
source ../iree-build/.env && export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
..\\iree-build\\.env.ps1 # or ..\\iree-build\\.env.bat\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
Using IREE's ML framework importers requires a few extra steps:
# Install test requirements\npython -m pip install -r integrations/tensorflow/test/requirements.txt\n\n# Install pure Python packages (no build required)\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n\n# Then test the tools:\niree-import-tf --help\niree-import-tflite --help\n
"},{"location":"building-from-source/ios/","title":"iOS cross-compilation","text":"Cross-compilation for iOS consists of the two steps below.
- On the macOS host, build the IREE compiler. We can run it to create IREE modules.
- Build the IREE runtime on the macOS host for iOS devices and the simulator. We can then run the IREE module on the simulator.
","tags":["iOS"]},{"location":"building-from-source/ios/#prerequisites","title":"Prerequisites","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#install-xcode-and-ios-sdk","title":"Install Xcode and iOS SDK","text":"For cross-compilation, you need Xcode. It comes with the SDKs for iOS devices and the simulator, as well as the simctl
tool for controlling the simulator from the command line.
","tags":["iOS"]},{"location":"building-from-source/ios/#host-environment-setup","title":"Host environment setup","text":"On your host platform, you should already be able to build IREE from source. Please make sure you've gone through the steps in getting started.
","tags":["iOS"]},{"location":"building-from-source/ios/#configure-and-build","title":"Configure and build","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#build-the-iree-compiler-for-the-host","title":"Build the IREE compiler for the Host","text":"Build and install on your macOS host:
cmake -S . -B ../iree-build/ -GNinja \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install\n\ncmake --build ../iree-build/ --target install\n
","tags":["iOS"]},{"location":"building-from-source/ios/#cross-compile-the-iree-runtime-for-ios","title":"Cross-compile the IREE runtime for iOS","text":"Build the runtime for the iOS Simulator.
cmake -S . -B ../build-ios-sim -GNinja \\\n-DCMAKE_SYSTEM_NAME=iOS \\\n-DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphonesimulator Path) \\\n-DCMAKE_OSX_ARCHITECTURES=arm64 \\\n-DCMAKE_SYSTEM_PROCESSOR=arm64 \\\n-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 \\\n-DCMAKE_IOS_INSTALL_COMBINED=YES \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DCMAKE_INSTALL_PREFIX=../build-ios-sim/install \\\n-DIREE_BUILD_COMPILER=OFF\n\ncmake --build ../build-ios-sim --config Release --target install\n
Or, we can build the runtime for iOS devices it by changing the value of the -DCMAKE OSX SYSROOT
option to:
-DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphoneos Path)\n
","tags":["iOS"]},{"location":"building-from-source/ios/#running-iree-modules-on-the-ios-simulator","title":"Running IREE modules on the iOS Simulator","text":"Run the IREE compiler on the host to generate a module.
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
We could test the generated module by running the macOS version of iree-run-module
on the host.
../iree-build/install/bin/iree-run-module \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--device=local-task \\\n--function=abs \\\n--input=\"f32=-5\"\n
To run it on the iOS simulator, we need to copy the vmfb file into the iree-run-module
iOS app bundle.
cp /tmp/simple_abs_vmvx.vmfb \\\n../build-ios-sim/install/bin/iree-run-module.app/\n
Open the iOS Simulator Manager on the host.
open -a Simulator\n
After creating and booting a simulator in this app, you can list it from the command-line.
xcrun simctl list devices | grep Booted\n
This is what should come out of the command:
iPhone 14 Pro (12341234-ABCD-ABCD-ABCD-123412341234) (Booted)\n
where iPhone 14 Pro
is the device being simulated and 12341234-ABCD-ABCD-ABCD-123412341234
is the simulator's unique device ID (UDID).
Install the app iree-run-module
on the simulator, given its UDID.
xcrun simctl install <UDID> ../build-ios-sim/install/bin/iree-run-module.app\n
Check the path to the installed bundle, where the simple_abs_vmvx.vmfb
module should be found.
ls $(xcrun simctl get_app_container <UDID> dev.iree.iree-run-module)\n
The string dev.iree.iree-run-module
is the bundle identifier of the iOS app. The CMake building process generates it and saves it in the property list (plist) file ../build-ios-sim/install/bin/iree-run-module.app/Info.plist
.
Launch the iree-run-module
app on the simulator to run the IREE module simple_abs_vmvx.vmfb
.
xcrun simctl launch --console \\\n<UDID> \\\ndev.iree.runmodule \\\n--device=local-task \\\n--function=abs \\\n--input=\"f32=-5\" \\\n--module=$(xcrun simctl get_app_container <UDID> dev.iree.iree-run-module)/simple_abs_vmvx.vmfb\n
","tags":["iOS"]},{"location":"building-from-source/riscv/","title":"RISC-V cross-compilation","text":"Running on a platform like RISC-V involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific RISC-V CPU architecture and operating system):
- IREE's compiler is built on the host and is used there to generate modules for the target
- IREE's runtime is built on the host for the target. The runtime is then pushed to the target to run natively.
","tags":["CPU"]},{"location":"building-from-source/riscv/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-environment-setup","title":"Host environment setup","text":"You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.
","tags":["CPU"]},{"location":"building-from-source/riscv/#install-risc-v-cross-compile-toolchain-and-emulator","title":"Install RISC-V cross-compile toolchain and emulator","text":"You'll need a RISC-V LLVM compilation toolchain and a RISC-V enabled QEMU emulator.
See instructions in the following links
- Clang getting started
- RISC-V GNU toolchain
- QEMU
- RISC-V Linux QEMU
Note
The RISCV_TOOLCHAIN_ROOT
environment variable needs to be set to the root directory of the installed GNU toolchain when building the RISC-V compiler target and the runtime library.
","tags":["CPU"]},{"location":"building-from-source/riscv/#install-prebuilt-risc-v-tools-risc-v-64-bit-linux-toolchain","title":"Install prebuilt RISC-V tools (RISC-V 64-bit Linux toolchain)","text":"Execute the following script to download the prebuilt RISC-V toolchain and QEMU from the IREE root directory:
./build_tools/riscv/riscv_bootstrap.sh\n
Note
The prebuilt toolchain is built with AlmaLinux release 8.8 docker It requires glibc >= 2.28 for your host machine.
","tags":["CPU"]},{"location":"building-from-source/riscv/#support-vector-extension","title":"Support vector extension","text":"For RISC-V vector extensions support, see additional instructions
","tags":["CPU"]},{"location":"building-from-source/riscv/#configure-and-build","title":"Configure and build","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-configuration","title":"Host configuration","text":"Build and install on your host machine:
cmake -GNinja -B ../iree-build/ \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n.\ncmake --build ../iree-build/ --target install\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#target-configuration","title":"Target configuration","text":"The following instruction shows how to build for a RISC-V 64-bit Linux machine. For other RISC-V targets, please refer to riscv.toolchain.cmake as a reference of how to set up the cmake configuration.
","tags":["CPU"]},{"location":"building-from-source/riscv/#risc-v-64-bit-linux-target","title":"RISC-V 64-bit Linux target","text":"cmake -GNinja -B ../iree-build-riscv/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"./build_tools/cmake/riscv.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=$(realpath ../iree-build/install/bin) \\\n-DRISCV_CPU=linux-riscv_64 \\\n-DIREE_BUILD_COMPILER=OFF \\\n-DRISCV_TOOLCHAIN_ROOT=${RISCV_TOOLCHAIN_ROOT} \\\n-DIREE_ENABLE_CPUINFO=OFF \\\n.\ncmake --build ../iree-build-riscv/\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#running-iree-bytecode-modules-on-the-risc-v-system","title":"Running IREE bytecode modules on the RISC-V system","text":"Note
The following instructions are meant for the RISC-V 64-bit Linux target. For the bare-metal target, please refer to simple_embedding to see how to build a ML workload for a bare-metal machine.
Set the path to qemu-riscv64 Linux emulator binary in the QEMU_BIN
environment variable. If it is installed with riscv_bootstrap.sh
, the path is default at ${HOME}/riscv/qemu/linux/RISCV/bin/qemu-riscv64.
export QEMU_BIN=<path to qemu-riscv64 binary>\n
Invoke the host compiler tools to produce a bytecode module FlatBuffer:
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
Run the RISC-V emulation:
${QEMU_BIN} \\\n-cpu rv64 \\\n-L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n../iree-build-riscv/tools/iree-run-module \\\n--device=local-task \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--function=abs \\\n--input=f32=-5\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#optional-configuration","title":"Optional configuration","text":"RISC-V Vector extensions allows SIMD code to run more efficiently. To enable the vector extension for the compiler toolchain and the emulator, build the tools from the following sources:
- RISC-V toolchain is built from https://github.com/llvm/llvm-project.
- Currently, the LLVM compiler is built on GNU toolchain, including libgcc, GNU linker, and C libraries. You need to build GNU toolchain first.
- Clone GNU toolchain from: https://github.com/riscv/riscv-gnu-toolchain. Switch the \"riscv-binutils\" submodule to
git://sourceware.org/git/binutils-gdb.git
manually.
- RISC-V QEMU is built from https://gitlab.com/qemu-project/qemu/tree/v8.1.2.
The SIMD code can be generated following the IREE CPU flow with the additional command-line flags
tools/iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-target-triple=riscv64 \\\n--iree-llvmcpu-target-abi=lp64d \\\n--iree-llvmcpu-target-cpu-features=\"+m,+a,+f,+d,+zvl512b,+v\" \\\n--riscv-v-fixed-length-vector-lmul-max=8 \\\niree_input.mlir -o mobilenet_cpu.vmfb\n
Then run on the RISC-V QEMU:
${QEMU_BIN} \\\n-cpu rv64,Zve64d=true,vlen=512,elen=64,vext_spec=v1.0 \\\n-L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n../iree-build-riscv/tools/iree-run-module \\\n--device=local-task \\\n--module=mobilenet_cpu.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
","tags":["CPU"]},{"location":"community/","title":"Community projects","text":"Projects built by community members:
-
The SHARK project from nod.ai uses a forked version of IREE (SHARK-Runtime), offering highly tuned performance on a large corpus of machine learning programs.
-
The IREE Bare-Metal Arm Sample shows how to build IREE with the Arm GNU Toolchain for bare-metal Arm targets using the open-source firmware libraries CMSIS and libopencm3.
-
The IREE C++ Template shows one way to integrate IREE's runtime into a project with CMake.
Official repositories:
-
iree-jax is home to IREE's support for JAX programs.
-
iree-torch contains IREE's PyTorch frontend, leveraging the torch-mlir project.
-
iree-samples includes various samples and prototypes built with IREE.
-
iree-llvm-sandbox contains experimental work by the IREE team closely related to LLVM and MLIR, usually with the aim of contributing back to those upstream projects.
"},{"location":"community/tags/","title":"Tags","text":"Website pages sorted by tag:
"},{"location":"community/tags/#android","title":"Android","text":" - Android cross-compilation
- Android LLDB debugging
"},{"location":"community/tags/#cpu","title":"CPU","text":" - RISC-V cross-compilation
- Matrix Multiplication with MMT4D
- Profiling CPUs
- CPU - Bare-Metal
- CPU
"},{"location":"community/tags/#cuda","title":"CUDA","text":" - CUDA backend
- CUDA backend design
- GPU - CUDA
"},{"location":"community/tags/#gpu","title":"GPU","text":" - CUDA backend
- Vulkan environment setup
- CUDA backend design
- Profiling GPUs using Vulkan
- GPU - CUDA
- GPU - Metal
- GPU - ROCm
- GPU - Vulkan
"},{"location":"community/tags/#jax","title":"JAX","text":" - JAX
- Extensions
- Glossary
"},{"location":"community/tags/#pytorch","title":"PyTorch","text":" - PyTorch
- Extensions
- Glossary
"},{"location":"community/tags/#python","title":"Python","text":" - JAX
- PyTorch
- TensorFlow
- TensorFlow Lite
- Python
"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":" - TFLite support via TOSA
- TensorFlow
- TensorFlow Lite
- Extensions
- Glossary
"},{"location":"community/tags/#vulkan","title":"Vulkan","text":" - Vulkan environment setup
- Profiling GPUs using Vulkan
- GPU - Vulkan
"},{"location":"community/tags/#web","title":"Web","text":" - Building with Emscripten
"},{"location":"community/tags/#ios","title":"iOS","text":" - iOS cross-compilation
- GPU - Metal
"},{"location":"community/blog/","title":"Blog","text":"Updates from the IREE team
"},{"location":"community/blog/2021-10-15-cuda-backend/","title":"CUDA backend","text":"IREE is being designed with re-targetability as a core goal: it should be possible to use IREE to target a broad spectrum of power regimes, from embedded systems to distributed clusters; and it should be possible to extend IREE to target new back-ends without having to reinvent the wheel each time.
To explore this, we recently branched out from our initial focus on low-latency mobile deployments with a goal of using IREE to target data center workloads on Nvidia CUDA. This post describes how we quickly brought up a CUDA back-end for IREE and used it to train BERT, then shares some metrics and next steps.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#bring-up","title":"Bring up","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hal-support","title":"HAL support","text":"IREE has a HAL API that abstract all the targets behind a common interface. The first step to supporting a CUDA target was to map the HAL API onto CUDA. We use the CUDA driver API to reduce dependencies and be closer to the hardware. The HAL API is based on other GPU APIs like Vulkan and Metal, so it was a natural fit for CUDA. The HAL API exposes memory allocations, basic fill and memset commands, kernel dispatch, and general command buffer handling. The original implementation uses the CUDA graph API as a graph maps naturally to command buffers. There is also an implementation using CUDA streams for comparison.
HAL exposes an API that can be tested independently, even if we are not able to create CUDA kernels yet we can test a large portion of the CUDA driver using CTS tests. Those can be run to make sure a system has the required CUDA support.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#compiler-support","title":"Compiler support","text":"CUDA has an open source backend in LLVM generating PTX that we are leveraging. Therefore IREE can create NVVM (CUDA LLVM variant) and use LLVM's backend to generate PTX. The CUDA driver will do the \"last mile compilation\" at runtime to convert PTX into the GPU's native ISA.
IREE compiler pipeline starts from linalg with tensor operands. A large part of the compiler is independent of the target.
The linalg on tensor representation of the graph is broken up into dispatch regions that are processed by NVVM Codegen. A simple implementation of the compiler is to run bufferization and convert linalg to standard followed by conversion to NVVM/LLVM. Most of those transformation can re-use upstream MLIR transformations and share it with any other backend targeting LLVM IR. Leveraging MLIR conversion to LLVM will allow us to quickly go from a simple \"hello world\" to supporting full models.
IREE code generation is based on MLIR infrastructure so each step can easily be tested independently using the MLIR lit framework.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#flatbuffer-definition","title":"FlatBuffer definition","text":"Kernels are encoded in a FlatBuffer containing the PTX code as well as the workgroup size to use for the dispatch. This allows serialization of the kernels in the IR, it is then de-serialized by the HAL layer.
table CUDAExecutableDef {\n // A map of entry point ordinals to string names as used in the shader\n // library.\n entry_points:[string];\n\n // Block sizes for each entry point.\n block_sizes:[CUDABlockSizeDef];\n\n // PTX string of the module.\n ptx_image:string;\n}\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hello-world","title":"Hello world","text":"Together those 3 steps are enough to provide most of the functionality and we can now successfully compile full models.
The steps to reproduce running a simple op end to end through CUDA backend are described here.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#performance","title":"Performance","text":"Now that we have enabled functionality we need to look at the performance. Once again we can leverage existing MLIR transformations to speed up the developement work.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#tiling-and-distribution","title":"Tiling and distribution","text":"The first obvious step to get efficient code on CUDA is to make sure we distribute the work on enough blocks and threads to fill up the GPU. At the time of bring up not all ops were being tiled and distributed in the common IREE layer. During dispatch region creation we apply tile and fuse which will distribute the work into a set of workgroups that are mapped to CUDA blocks.
At the beginning of the code generation we look at the dispatch region and decide on the tile size for a workgroup. For CUDA we also decide the number of threads per block. We will then have a pass tiling the ops in the dispatch region a second time to distribute the work onto threads within the block.
At this stage the IR looks like the following:
%8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -> index\n %9 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%8]\n %10 = memref.subview %in0[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %11 = memref.subview %in1[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %12 = memref.subview %out[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n linalg.generic {\n indexing_maps = [affine_map<(d0) -> (d0)>,\n affine_map<(d0) -> (d0)>,\n affine_map<(d0) -> (d0)>],\n iterator_types = [\"parallel\"]}\n ins(%10, %11 :\n memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>,\n memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>)\n outs(%12 : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {\n ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors\n %13 = addf %arg1, %arg2 : f32\n linalg.yield %13 : f32\n }\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#vectorization","title":"Vectorization","text":"Even though GPUs execute most operations as scalar, memory operations are optimized to access 128 bits of data per thread. Therefore it is critical to vectorize load/store operations. After tiling to a size we vectorize the IR to get vector read/write mapping to load4/store4. This significantly improves the memory access pattern of the code generated.
This convert the previous IR to:
%8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -> index\n %9 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%8]\n %10 = memref.subview %in0[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %11 = memref.subview %in1[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %12 = memref.subview %out[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %13 = vector.transfer_read %10[%c0], %cst {in_bounds = [true]} : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>, vector<4xf32>\n %14 = vector.transfer_read %11[%c0], %cst {in_bounds = [true]} : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>, vector<4xf32>\n %15 = addf %13, %14 : vector<4xf32>\n vector.transfer_write %15, %12[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#shared-memory-optimization","title":"Shared memory optimization","text":"Nvidia GPUs have a fast shared memory that needs to be leveraged to optimize cases where we may be memory bound and have the potential to re-use memory reads.
For operations like GEMM using shared memory gives us a significant speed up. We leverage memory promotion, vector distribution and software pipelining transformations from MLIR to generate efficient copies from global to shared memory that can be interleaved with the compute work.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#optimization-pipeline","title":"Optimization pipeline","text":"Those different transformations compose to this flow:
The full dump step by step of a linalg.matmul operation can be found here.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#results-and-next-steps","title":"Results and next steps","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#gemm","title":"GEMM","text":"We compare the performance of a single GEMM operation to highly optimized library cuBLAS using mmperf framework.
The graph can be re-produced based on instructions on mmperf
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#future-work","title":"Future work","text":"Nod.ai has contributed an experimental HAL module for ROCM that allows us to re-use the compiler parts to support ROCM, more support is going to be added in the future.
Several performance improvements are still under progress, including optimizing the runtime allocator to reduce the host-side overhead and tuning tile sizes based profiling.
Several models are running and we will publish more detailed benchmark results in the near future.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/","title":"Matrix Multiplication with MMT4D","text":"","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#introduction","title":"Introduction","text":"Matrix multiplication (matmul) is an important operation in ML workloads that poses specific challenges to code generation. For example, matmul makes repeated accesses to the same data, which makes locality of reference a top concern.
Moreover, modern CPUs instruction set architectures (ISAs) offer specialized SIMD instructions that the matmul implementation needs to use to achieve optimal performance, and these instructions expect data to be in a particular layout.
This article is about an in-development MLIR operation, linalg.mmt4d
, offering a compilation path for linalg.matmul
that is designed from the ground up for these efficiency considerations.
We are still in the early implementation phase of this linalg.mmt4d
plan, but we feel confident that we know where we are going because what we are really doing here is importing into the compiler what we have learned working on optimized matrix multiplication libraries, particularly Ruy. We know what loop schedule and kernel we want the compiler to generate \u2014 essentially the same as we wrote in Ruy, give or take additional optimizations such as fusions and constant folding that become possible now that we are doing this within a compiler. This allows us to focus on how we get the compiler to generate that schedule and kernel with purely algebraic transformations that compose and enable further compiler optimizations.
At the basis of this work is the extensible op system of the Linalg dialect in the MLIR compiler toolkit. In this case, a general purpose, mixed precision mmt4d op is defined via a high level description directly in the compiler and is then available to both users of the compiler (as a linalg.mmt4d
op) or for direct emission via Python based IR construction (i.e. for direct integration into high level frameworks without rebuilding the compiler). The ability to define such new special forms cheaply, and without any systemic framework level cost, is part of the extensibility and composition story that we expect will become increasingly important in development and deployment scenarios in the future, and in this case, it let us spring board off of high quality code generation which was already well integrated and composed well with other features of the compiler.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#existing-matrix-multplication-code-generation","title":"Existing Matrix Multplication Code Generation","text":"Let us start by discussing IREE\u2019s existing matmul code generation and highlight the issues that mmt4d
aims to overcome.
The existing approach operates in-place on the source matrices. When we discuss \"tiling\" in this paragraph, we refer exclusively to the traversal \u2014 how these source matrices are traversed by the matmul loop. There is no \"tiled layout\" here, which will be the key difference with mmt4d
below.
The destination matrix is tiled into workgroups (CPU threads) tiles, then each workgroup tile is tiled to fit some level of CPU cache, and finally each tile is further tiled to fit target architecture registers (e.g. 8x8).
That multi-level tiling means that the code works like the following loop nest:
def tiled_matmul(A, B, C, tile_m, tile_n, tile_k, tile_m_v, tile_n_v, tile_k_v):\n m = A.shape[0]\n k = A.shape[1]\n n = B.shape[1]\n for m1 in range(0, m, tile_m):\n for n1 in range(0, n, tile_n):\n for k1 in range(0, k, tile_k):\n # First level of tiling views...\n lhs_tile = A[m1:m1+tile_m, k1:k1+tile_k]\n rhs_tile = B[k1:k1+tile_k, n1:n1+tile_n]\n dst_tile = C[m1:m1+tile_m, n1:n1+tile_n]\n for mv in range(0, tile_m, tile_m_v):\n for nv in range(0, tile_n, tile_n_v):\n for kv in range(0, tile_k, tile_k_v):\n # Register tiling views...\n lhs_tile_v = lhs_tile[mv:mv+tile_m_v, kv:kv+tile_k_v]\n rhs_tile_v = rhs_tile[kv:kv+tile_k_v, nv:nv+tile_n_v]\n # kernel.\n dst_tile[mv:mv+tile_m_v, nv:nv+tile_n_v] += np.matmul(lhs_tile_v, rhs_tile_v)\n return C\n
The two main problems with this approach are:
-
Overhead to meet SIMD ISA layout requirements: In practice, the kernel needs to use specific SIMD instructions to perform the arithmetic. They expect small tiles of the matrices to be loaded in registers, in a specific layout. If the matrix data wasn't already stored in memory in such a tiled layout, then the kernel has to perform such a data rearrangement on the fly, incurring substantial overhead. For NxN matrix multiplication, the kernel performs O(N3) work on O(N2) data, so doing that rearrangement there means O(N3) overhead where O(N2) should have sufficed, as this could have been done as a pre-processing step on O(N2) data.
-
Inefficent memory traversal: For efficiency reasons, we always need tile_m_v>1
and tile_n_v>1
. That is because the higher these values, the fewer memory-load instructions are needed overall; and this is also dictated by the SIMD instructions that we want to use. But that means that the kernel is accessing simultaneously multiple rows or columns of the left-hand and right-hand side matrices. And in this existing approach, they are stored in linear layout, not in a tiled layout, so these accesses are not contiguous in memory. This is detrimental to memory access performance, meaning the CPU caches, in multiple ways. One is that these multiple non-contiguous accesses may alias each other in the L1 cache because of low associativity.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#matrix-multiplication-operation-with-4d-tiled-operands","title":"Matrix Multiplication Operation With 4D Tiled Operands","text":"For the reasons above, an efficient matmul implementation must reorder data into a tiled layout matching the target SIMD ISA and making the memory access patterns as contiguous as possible.
IREE/MLIR defaults to bufferizing all tensors into a \"row-major\" order, meaning that the last-enumerated dimension is the one that is contiguous in memory. As we prefer not to write custom bufferization code, we can't specify an alternative layout for a tensor. Fortunately, it is possible to represent a 2D tiled layout as a 4D layout. For example, tensor<2x2x2x2xf32>
can represent a 4x4 matrix made of 2x2 tiles, each of which is 2x2. The row-major layout on tensor<2x2x2x2xf32>
makes each 2x2 tile contiguous and row-major, and arranges the 2x2 tiles themselves into a row-major 2x2 layout in the overall 4x4 matrix.
Such a row-major-tiled layout is exactly what we need for the left-hand side of a matrix multiplication, because matrix multiplication traverses the left-hand side matrix row by row. But for the right-hand side matrix, we want a column-major-tiled layout. To solve this problem, we decide to implement not matrix multiplication, but matrix-multiplication-by-transposed-right-hand-side which is where the t
in the linalg.mmt4d
came from. Now such an op is happy with both the left and right-hand sides being row-major-tiled.
The following example illustrates that. In these diagrams, each matrix element is rendered its memory offset.
To compute the 2x2 block in the destination matrix, we will have to load two yellow blocks from LHS, RHS matrices respectively compute their matmul results (i.e. call the kernel), then the two blue blocks, and so on. As we can see, each tile loads data that is not contiguous. It would be better if we rearranged the elements in the following layout:
Now tiles are stored contiguously in memory and the kernel can simply load them from memory into the registers that will be directly consumed by the SIMD instructions performing the multiplications. Moreover, the kernel is now loading from just two contiguous data streams, a simple memory access pattern which is sure to be efficient (regarding caches, etc) on any reasonable target hardware.
We introduce a linalg.mmt4d
operation that performs such a matrix multiplication on matrices in a tiled layout represented as 4D tensors. That leaves the question of how to represent, within the linalg dialect, the conversions between ordinary matrices represented as 2D tensors, and these tiled matrices represented as 4D tensors. Moreover, these conversions should be tileable and decompose well. Thankfully, the transformation from 2D to 4D can be written as a reshape followed by a transpose as in the following digram:
So we can think of the outermost two dimensions of the 4D representations as the tile position in the overall matrix, and the innermost two as the element position within one tile. Hopefully the following Python pseudocode makes it more concrete:
def pack_2d_4d(operand, parallel_size, reduction_size):\n i1 = operand.shape[0] // parallel_size # M1\n i2 = parallel_size # M0\n j1 = operand.shape[1] // reduction_size # K1\n j2 = reduction_size # K0\n operand_4d = np.reshape(operand, [i1, i2, j1, j2])\n return np.transpose(operand_4d, [0, 2, 1, 3]) # [M1, K1, M0, K0]\n
Now the mmt4d operation will follow a structure as the multi level tiling, for simplicity we considered the case here where no L1 tiling is required only first level of distribution to workgroups:
def mmt4d(A, B, C, M0, N0, K0):\n M = A.shape[0]\n N = B.shape[1]\n Bt = np.transpose(B, [1, 0])\n A4d = pack_2d_4d(A, M0, K0)\n Bt4d = pack_2d_4d(Bt, N0, K0)\n M1 = A4d.shape[0]\n N1 = Bt4d.shape[0]\n K1 = A4d.shape[1]\n for m1 in range(0, M1):\n for n1 in range(0, N1):\n for k1 in range(0, K1):\n # Tile views that are contiguous in memory.\n lhs_tile = np.reshape(A4d[m1, k1, :, :], [M0, K0])\n rhs_tile = np.reshape(Bt4d[n1, k1, :, :], [N0, K0])\n # Inner kernel.\n C[m1, n1, :, :] += np.matmul(lhs_tile, np.transpose(rhs_tile, [1, 0]))\n # 4d -> 2D\n C2d = unpack_4d_2d(C)\n return C2d\n
The resulting 4D tiled matrix still needs be rearranged back to the original layout as 2D tensor:
def unpack_4d_2d(operand):\n i1 = operand.shape[0] # M1\n j1 = operand.shape[1] # N1\n i2 = operand.shape[2] # M0\n j2 = operand.shape[3] # N0\n operand_transposed = operand.transpose([0, 2, 1, 3]) # [M1, M0, N1, N0]\n return operand_transposed.reshape([i1 * i2, j1 * j2]) # [M, N]\n
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#performance-results","title":"Performance Results","text":"We benchmarked various float32 matmul problems of different sizes and the result showed that mmt4d is faster than the existing matmul implementation for bigger matrices as we can see the in the following chart:
The SIMD instruction being used here is the simplest kind, a vector*scalar
multiplication, and the storage orders of the matrices allow the existing implementation to directly load the vectors from the source matrices without any rearrangement overhead. So this case is particularly friendly to the existing code, which is why the mmt4d code is only faster for bigger matrices. To understand why mmt4d is faster in that case, we collected statistics of L1 cache misses:
This shows that in this case, the better cache-friendliness of mmt4d, thanks to its simple contiguous memory access pattern, accounts for its higher performance.
As we proceed with increasingly sophisticated SIMD targets, starting with the dot-product instructions found in current mobile devices for the int8 case and going to become generalized to all data types all the way to float32 over the next few years with upcoming ARM SIMD instructions, the advantage of mmt4d will widen for all sizes, not just the larger ones.
Part of why we feel confident about the eventual performance that our approach will achieve is that, as mentioned in the introduction, we are rebuilding within the compiler an existing library's schedule and kernel, and we have benchmark results about it.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#conclusion","title":"Conclusion","text":"We introduced a 4d tiled representation for 2d matrix-matrix multiplication with a decomposable algebric transformations that requires only reshape and transpose of input operands, we discussed and empirically showed how that solves major drawbacks in row-major linear matmul by providing a flexible way to match different ISA layout along with better cache locality achieving near peak performance.
As was mentioned in the introduction, this work in under active development and the next immediate steps are to prove the rest of the hypothesis by:
-
Handling dynamic sizes and padding to the next multiple of the target tile size.
-
Implementing the integer case (int32 += int8 * int8
).
-
Implementing the dispatch to different SIMD ISA variants at runtime.
-
Implementing cache-friendly traversal for larger matmuls and multi-threading by interfacing with IREE's runtime dispatch.
-
Improving the generated code by fusing the 4d tiled layout with the producers and consumers of the linalg.mmt4d
.
","tags":["CPU"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/","title":"TFLite support via TOSA","text":"IREE can now execute TensorFlow Lite (TFLite) models through the use of TOSA, an open standard of common tensor operations, and a part of MLIR core. TOSA\u2019s high-level representation of tensor operations provides a common front-end for ingesting models from different frameworks. In this case we ingest a TFLite FlatBuffer and compile it to TOSA IR, which IREE takes as an input format to compile to its various backends.
Using TFLite as a frontend for IREE provides an alternative ingestion method for already existing models that could benefit from IREE\u2019s design. This enables models already designed for on-device inference to have an alternative path for execution without requiring any additional porting, while benefiting from IREE\u2019s improvements in buffer management, work dispatch system, and compact binary format. With continued improvements to IREE/MLIR\u2019s compilation performance, more optimized versions can be compiled and distributed to target devices without an update to the clientside environment.
Today, we have validated floating point support for a variety of models, including mobilenet (v1, v2, and v3) and mobilebert. More work is in progress to support fully quantized models, and TFLite\u2019s hybrid quantization, along with dynamic shape support.
","tags":["TensorFlow"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/#examples","title":"Examples","text":"TFLite with IREE is available in Python and Java. We have a colab notebook that shows how to use IREE\u2019s python bindings and TFLite compiler tools to compile a pre-trained TFLite model from a FlatBuffer and run using IREE. We also have an Android Java app that was forked from an existing TFLite demo app, swapping out the TFLite library for our own AAR. More information on IREE\u2019s TFLite frontend is available here.
","tags":["TensorFlow"]},{"location":"developers/","title":"Developers","text":"These pages cover topics useful for project maintainers and contributors.
Caution
Some of these pages may be stale. Contributions are always welcome!
"},{"location":"developers/usage-best-practices/","title":"Usage best practices","text":"This page contains a list of best practices for getting the most out of IREE, spanning model authoring, ahead-of-time compilation, and runtime use. Treat these as a collection of ideas to consider or areas to start benchmarking when working on your own applications.
"},{"location":"developers/usage-best-practices/#introduction","title":"Introduction","text":"Common themes include:
- Give the compiler as much information as possible
- Give the compiler opportunities to batch work together or defer computation
- Keep compute devices saturated with work through pipelining
- Use dense math where possible, particularly for inner loop bodies
- Limit synchronization points between devices like CPUs and GPUs
- Profile early and often, using the right tools for each level of granularity
"},{"location":"developers/usage-best-practices/#practices-for-model-authoring","title":"Practices for model authoring","text":""},{"location":"developers/usage-best-practices/#track-state-within-your-model-when-possible","title":"Track state within your model when possible","text":"If your model is stateful prefer to store that state directly within your program rather than externalizing it through arguments and return values. By keeping state inside your program the compiler is better able to reason about it and function calls will have lower overhead.
If you do externalize state, try to pack that state into a limited number of arguments.
See the variables and state sample for further guidance on tracking and using state.
"},{"location":"developers/usage-best-practices/#limit-uses-of-dynamic-shapes","title":"Limit uses of dynamic shapes","text":"While IREE aims to support general dynamic shapes use, it is better able to optimize parts of programs where shapes are static. Slow varying dimensions like batch index or timestamp are safer uses of dynamic shapes than faster varying dimensions like the x/y/channel dimensions of images.
See the dynamic shapes sample for further guidance on using dynamic shapes.
"},{"location":"developers/usage-best-practices/#practices-for-compilation-settings","title":"Practices for compilation settings","text":"TODO: which compiler targets to use (try both CUDA and Vulkan?)
TODO: use the most specific LLVM target triple you can?
"},{"location":"developers/usage-best-practices/#tuning-compilation-heuristics","title":"Tuning compilation heuristics","text":"IREE runs its own suite of benchmarks continuously using the definitions at https://github.com/openxla/iree/tree/main/benchmarks. The flags set for these benchmarks represent the latest manually tuned values for workloads we track closely and referencing them may help with your own search for peak performance. You can use these flags in your own explorations, but note that as compiler performance matures, the existing flags will gradually be replaced with attributes for autotuning or command line options for experimental features.
"},{"location":"developers/usage-best-practices/#practices-for-runtime-use","title":"Practices for runtime use","text":"TODO: sample code, profile numbers
"},{"location":"developers/usage-best-practices/#tuning-runtime-settings","title":"Tuning runtime settings","text":"When running on the CPU, the task system flags specified in iree/task/api.c give control over how worker threads will be created. For example, the --task_topology_group_count=3
flag can be set to explicitly run on three workers rather than rely on heuristic selection that defaults to one worker per detected physical core.
If running on a single thread or system with no threading support the local-sync
HAL driver can be used instead of the multithreaded local-task
HAL driver to reduce dependencies and code size. When running with the local-sync
driver all execution happens inline on the thread invoking the IREE runtime and will block until it has completed.
"},{"location":"developers/usage-best-practices/#do-the-minimum-amount-of-work-cache-queries-and-reuse-buffers","title":"Do the minimum amount of work: cache queries and reuse buffers","text":"When using IREE's runtime libraries, try to front-load queries, particularly queries using strings that look up into maps like iree_runtime_session_call_by_name
, so that hot sections of code are doing the minimum amount of work: routing inputs through buffers, scheduling runtime calls, and routing outputs through other buffers.
"},{"location":"developers/vulkan-environment-setup/","title":"Vulkan environment setup","text":"Vulkan is a new generation graphics and compute API that provides high-efficiency, cross-platform access to modern GPUs used in a wide variety of devices from PCs and consoles to mobile phones and embedded platforms.
This page lists steps and tips for setting up and troubleshooting a Vulkan development environment. The information here is meant to be generic.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-architecture","title":"Vulkan architecture","text":"Vulkan adopts a layered architecture, which aims to better support extensiblity. There are four components involved in this architecture:
- The Vulkan Application
- The Vulkan Loader
- Vulkan Layers
- Installable Client Drivers (ICDs)
The Vulkan loader sits between the Vulkan application, which calls Vulkan APIs, and the ICDs, which implements these Vulkan APIs. Vulkan layers agument the Vulkan system to provide optional features like validation and debugging. The Vulkan loader composes a chain of requested layers, which processes the Vulkan application's API calls one by one, and finally redirects the API calls made by the Vulkan application to one or more ICDs.
It's highly recommned to read the Architecture of the Vulkan Loader Interfaces Overview to get a general understanding of what these components are and how they interact with one another.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-setup_1","title":"Vulkan environment setup","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#windows","title":"Windows","text":"You need to install the Vulkan SDK from LunarG to get the Vulkan loader.
Typically the Vulkan SDK will be installed at C:\\VulkanSDK\\<version>\\
and there will be an environment variable VULKAN_SDK
pointing to it. You can run the vulkancube
executable under the Bin\\
subdirectory of the Vulkan SDK to make sure everything works properly. If not, you probably need to check whether the graphics card is Vulkan capable or update the driver.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#debianubuntu","title":"Debian/Ubuntu","text":"For Ubuntu 20.04/22.04, it's recommended to directly install the full Vulkan SDK from LunarG's APT sources for the loader and various developer tools.
If you want to have a minimal environment, the following packages should be installed for a proper Vulkan runtime:
libvulkan1
for the Vulkan loader libvulkan.so
. - For AMD GPUs, you can install
mesa-vulkan-drivers
for the Mesa AMD Vulkan ICD, or - AMD's official VUlkan ICD.
- For NVIDIA GPUs, you can install
nvidia-vulkan-icd
on Debian for NVIDIA Vulkan ICD. - the most recent
nvidia-driver-*
package on Ubuntu for NVIDIA Vulkan ICD.
The above packages provide the Vulkan loader and ICDs. With them a Vulkan application should be able to run. You may additionally want to install
- vulkan-tools for command-line tools like
vulkaninfo
(dumping available ICDs and their capabilities) and GUI application like vulkancube
(rendering a rotating cube).
In order to develop Vulkan applications, you additionally need the following packages:
- libvulkan-dev for various Vulkan header files.
- vulkan-validationlayers for Vulkan validation layers like
VkLayer_standard_validation
.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#linux","title":"Linux","text":"For other Linux distros, please consult the corresponding package management tools for the packages needed. (And please feel free to update this doc regarding them.)
You can also download and install the Vulkan SDK tarball from LunarG. It packages the loader with many useful layers and other shader tools.
You can also build the Vulkan SDK component projects like Vulkan-Loader and Vulkan-ValidationLayers from source. But note that building these components separately you need to make sure they are consistent with one another (e.g., using the same version of Vulkan headers) to function together.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#android","title":"Android","text":"Please make sure your Android device is Vulkan capable. Vulkan is supported on Android since 7, but we track newer Android versions (10+) closely and haven't set a clear min version yet.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#multiple-vulkan-sdks","title":"Multiple Vulkan SDKs","text":"If you have multiple versions of Vulkan loaders exist, you may also need to set LD_LIBRARY_PATH
and LD_PRELOAD
to load the desired version of the loader. For example:
LD_LIBRARY_PATH={PATH_TO_VULKAN_SDK}/x86_64/lib/\nLD_PRELOAD=libvulkan.so.1\n
This can also be done by sourcing the proper setup-env.sh
from one of the downloaded Vulkan SDKs.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-troubleshooting","title":"Vulkan environment troubleshooting","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#useful-environment-variables","title":"Useful environment variables","text":"There are a few environment variables that can alter the default Vulkan loader behavior and print verbose information, notably:
VK_LOADER_DEBUG
: enable loader debug messages. Setting it to all
will enable the most verbose logging from the loader. This is especially useful when trying to see what layers/ICDs are searched and used. VK_ICD_FILENAMES
: force the loader to use a specific ICD. This is especially useful when you have multiple Vulkan capable devices and want to select which one to use manually. VK_INSTANCE_LAYERS
: force the loader to enable the given layers. For example, You can force enable VK_LAYER_LUNARG_api_dump
to have a detailed dump of all Vulkan API calls made by the application. You can force enable VK_LAYER_LUNARG_core_validation
to validate the API calls made by the application. VK_LAYER_PATH
: override the loader's standard layer library search folders.
Please see the Vulkan loader's documentation for detailed explanation for these variables.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#setting-environment-variables-for-bazel-test","title":"Setting environment variables for Bazel test","text":"Bazel runs tests in a sandbox and environment variables must be passed through to the test runner. Consider putting environment setup in a user.bazelrc
to save typing. For example:
test --test_env=\"LD_LIBRARY_PATH=/absolute/path/to/vulkan/sdk/x86_64/lib/\"\ntest --test_env=\"LD_PRELOAD=libvulkan.so.1\"\ntest --test_env=\"VK_LAYER_PATH=/absolute/path/to/additional/layers/:$VK_LAYER_PATH\"\n
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-function-vkcreateinstance-not-available-on-android","title":"Vulkan function vkCreateInstance
not available on Android","text":"Since Android 8 Oreo, Android re-architected the OS framework with project Treble. Framework libraries and vendor libraries have a more strict and clear separation. Their dependencies are carefully scrutinized and only selected cases are allowed. This is enforced with linker namespaces.
/data/local/tmp
is the preferred directory for automating native binary tests built using NDK toolchain. They should be allowed to access libraries like libvulkan.so
for their functionality. However, there was an issue with fully treblized Android 10 where /data/local/tmp
did not have access to the linker namespaces needed by libvulkan.so
. This should be fixed now. But as typically in the Android system, it takes a long time to see the fix getting propagated, if ever.
A known workaround is to symlink the vendor Vulkan implementation under /vendor/lib[64]
as libvulkan.so
under /data/local/tmp
and use LD_LIBRARY_PATH=/data/local/tmp
when invoking IREE executables.
For Qualcomm Adreno GPUs, the vendor Vulkan implementation is at /vendor/lib[64]/hw/vulkan.*.so
. So for example for Snapdragon 865:
adb shell ln -s /vendor/lib64/hw/vulkan.kona.so /data/local/tmp/libvulkan.so\n
For ARM Mali GPUs, there is only one monolithic driver (/vendor/lib[64]/libGLES_mali.so
) for OpenGL and Vulkan and the Vulkan vendor driver (/vendor/lib[64]/hw/vulkan.*.so
) is just a symlink to it. So for example:
adb shell ln -s /vendor/lib64/libGLES_mali.so /data/local/tmp/libvulkan.so\n
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#ssh-on-linux-and-x-forwarding","title":"SSH on Linux and X forwarding","text":"Physical devices enumerated on NVIDIA drivers can be affected by the DISPLAY
environment variable. If you are running under an SSH session to Linux or using chrome remote desktop and have problems with physical device enumeration, you probably want to check the DISPLAY
environment and set it to point to a display at the server side, for example:
export DISPLAY=:0\n
","tags":["GPU","Vulkan"]},{"location":"developers/building/bazel/","title":"Building with Bazel","text":"This page walks through building IREE from source using the Bazel build system.
Warning
Bazel build support is primarily for internal project infrastructure. We strongly recommend using CMake instead.
Our Bazel configuration is also only tested on Linux. Windows and macOS may be unstable.
"},{"location":"developers/building/bazel/#prerequisites","title":"Prerequisites","text":"Linux macOS Windows -
Install Bazel, matching IREE's .bazelversion
by following the official docs.
-
Install a compiler such as Clang (GCC is not fully supported).
sudo apt install clang\n
Set environment variables for Bazel:
export CC=clang\nexport CXX=clang++\n
-
Install Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
-
Install Homebrew:
/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)\"\n
-
Install Bazel, matching IREE's .bazelversion
by following the official docs or via Homebrew:
brew install bazel\n
-
Install Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
Tip
You can simplify installation by using a package manager like Scoop or Chocolatey.
-
Install Bazel, matching IREE's .bazelversion
by following the official docs.
Also install MSYS2 by following Bazel's documentation.
-
Install Python3 (docs here) and Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
-
Install the full Visual Studio or \"Build Tools For Visual Studio\" from the downloads page then set the BAZEL_VS
environment variable:
> $env:BAZEL_VS = \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\"\n
"},{"location":"developers/building/bazel/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":""},{"location":"developers/building/bazel/#clone","title":"Clone","text":"Use Git to clone the IREE repository and initialize its submodules:
git clone https://github.com/openxla/iree.git\ncd iree\ngit submodule update --init\n
Configure Bazel:
# This generates a `configured.bazelrc` file by analyzing your environment.\n# Skipping this step will make it difficult to select your platform/compiler.\npython3 configure_bazel.py\n
Linux macOS Windows (No Linux-specific tips for configuring)
(No macOS-specific tips for configuring)
Tip
Clone to a short path like C:\\projects\\
to avoid issues with Windows maximum path lengths (260 characters).
Tip
configure_bazel.py
only detects that you have Windows and will output the default --config=windows
to configured.bazelrc
, which assumes the latest version of MSVC. To avoid some warnings, you may want to replace it with (for example) --config=msvc2022
.
"},{"location":"developers/building/bazel/#build","title":"Build","text":"Run all core tests:
bazel test -k //...\n
Tip
You can add flags like --test_env=IREE_VULKAN_DISABLE=1
to your test command to change how/which tests run.
In general, build artifacts will be under the bazel-bin
directory at the top level.
"},{"location":"developers/building/bazel/#recommended-userbazelrc","title":"Recommended user.bazelrc
","text":"You can put a user.bazelrc at the root of the repository and it will be ignored by git.
Linux macOS Windows build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n
build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n
build --disk_cache=c:/bazelcache\nbuild:debug --compilation_mode=dbg --copt=/O2 --per_file_copt=iree@/Od --strip=never\n
"},{"location":"developers/building/bazel/#whats-next","title":"What's next?","text":""},{"location":"developers/building/bazel/#take-a-look-around","title":"Take a Look Around","text":"Build all of IREE's 'tools' directory:
bazel build tools/...\n
Check out what was built:
ls bazel-bin/tools/\n./bazel-bin/tools/iree-compile --help\n
Translate a MLIR file and execute a function in the compiled module:
# iree-run-mlir <compiler flags> [input.mlir] <runtime flags>\n$ ./bazel-bin/tools/iree-run-mlir \\\n--iree-hal-target-backends=vmvx --print-mlir \\\n./samples/models/simple_abs.mlir \\\n--input=f32=-2\n
"},{"location":"developers/building/cmake-options/","title":"CMake options","text":""},{"location":"developers/building/cmake-options/#frequently-used-cmake-options","title":"Frequently-used CMake options","text":""},{"location":"developers/building/cmake-options/#cmake_build_type","title":"CMAKE_BUILD_TYPE
","text":" - type: STRING
Sets the build type. Possible values are Release
, Debug
, RelWithDebInfo
and MinSizeRel
. If unset, build type is set to Release
.
"},{"location":"developers/building/cmake-options/#cmake_lang_compiler","title":"CMAKE_<LANG>_COMPILER
","text":" - type: STRING
This is the command that will be used as the <LANG>
compiler, which are C
and CXX
in IREE. These variables are set to compile IREE with clang
or rather clang++
. Once set, these variables can not be changed.
"},{"location":"developers/building/cmake-options/#iree-specific-cmake-options","title":"IREE-specific CMake options","text":"This gives a brief explanation of IREE specific CMake options and variables.
"},{"location":"developers/building/cmake-options/#iree_enable_runtime_tracing","title":"IREE_ENABLE_RUNTIME_TRACING
","text":" - type: BOOL
Enables instrumented runtime tracing. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_enable_compiler_tracing","title":"IREE_ENABLE_COMPILER_TRACING
","text":" - type: BOOL
Enables instrumented compiler tracing. This requires that IREE_ENABLE_RUNTIME_TRACING
also be set. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_compiler","title":"IREE_BUILD_COMPILER
","text":" - type: BOOL
Builds the IREE compiler. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_tests","title":"IREE_BUILD_TESTS
","text":" - type: BOOL
Builds IREE unit tests. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_docs","title":"IREE_BUILD_DOCS
","text":" - type: BOOL
Builds IREE documentation files. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_samples","title":"IREE_BUILD_SAMPLES
","text":" - type: BOOL
Builds IREE sample projects. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_python_bindings","title":"IREE_BUILD_PYTHON_BINDINGS
","text":" - type: BOOL
Builds the IREE python bindings. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite","title":"IREE_BUILD_BINDINGS_TFLITE
","text":" - type: BOOL
Builds the IREE TFLite C API compatibility shim. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite_java","title":"IREE_BUILD_BINDINGS_TFLITE_JAVA
","text":" - type: BOOL
Builds the IREE TFLite Java bindings with the C API compatibility shim. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_experimental_remoting","title":"IREE_BUILD_EXPERIMENTAL_REMOTING
","text":" - type: BOOL
Builds experimental remoting component. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_hal_driver_defaults","title":"IREE_HAL_DRIVER_DEFAULTS
","text":" - type: BOOL
Default setting for each IREE_HAL_DRIVER_*
option.
"},{"location":"developers/building/cmake-options/#iree_hal_driver_","title":"IREE_HAL_DRIVER_*
","text":" - type: BOOL
Individual options enabling the build for each runtime HAL driver.
"},{"location":"developers/building/cmake-options/#iree_target_backend_defaults","title":"IREE_TARGET_BACKEND_DEFAULTS
","text":" - type: BOOL
Default setting for each IREE_TARGET_BACKEND_*
option.
"},{"location":"developers/building/cmake-options/#iree_target_backend_","title":"IREE_TARGET_BACKEND_*
","text":" - type: BOOL
Individual options enabling the build for each compiler target backend.
"},{"location":"developers/building/cmake-options/#iree_input_","title":"IREE_INPUT_*
","text":" - type: BOOL
Individual options enabling each set of input dialects.
"},{"location":"developers/building/cmake-options/#iree_output_format_c","title":"IREE_OUTPUT_FORMAT_C
","text":" - type: BOOL
Enables the vm-c compiler output format, using MLIR EmitC. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_dev_mode","title":"IREE_DEV_MODE
","text":" - type: BOOL
Configure settings to optimize for IREE development (as opposed to CI or release). Defaults to OFF
. For example, this will downgrade some compiler diagnostics from errors to warnings.
"},{"location":"developers/building/cmake-options/#iree_enable_lld","title":"IREE_ENABLE_LLD
","text":" - type: BOOL
Use lld when linking. Defaults to OFF
. This option is equivalent to -DIREE_USE_LINKER=lld
. The option IREE_ENABLE_LLD
and IREE_USE_LINKER
can not be set at the same time.
"},{"location":"developers/building/cmake-options/#iree_enable_asan","title":"IREE_ENABLE_ASAN
","text":" - type: BOOL
Enable address sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_msan","title":"IREE_ENABLE_MSAN
","text":" - type: BOOL
Enable memory sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_tsan","title":"IREE_ENABLE_TSAN
","text":" - type: BOOL
Enable thread sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_ubsan","title":"IREE_ENABLE_UBSAN
","text":" - type: BOOL
Enable undefiend behavior sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#cross-compilation","title":"Cross-compilation","text":"When cross compiling (using a toolchain file like android.toolchain.cmake
), first build and install IREE's tools for your host configuration, then use the IREE_HOST_BIN_DIR
CMake option to point the cross compiled build at the host tools.
"},{"location":"developers/building/cmake-with-ccache/","title":"CMake with ccache
","text":"ccache
is a compilation cache. In principle, just prepending compiler invocations with ccache
is all one needs to enable it, e.g.
ccache clang foo.c -c -o foo.o\n
takes care of executing clang
with these arguments and caches the output file foo.o
. The next invocation then skips executing clang
altogether.
When the cache is hit, the speedup is such that the \"compilation\" becomes essentially free. However, ccache
only caches compilation, not linking.
Here a few scenarios where ccache
helps:
- Incremental rebuilds. While
cmake
always tries to avoid unnecessary work in incremental rebuilds, it can only make simple decisions based on file timestamps. ccache
sees deeper: if the raw source code isn't readily a cache hit, it will then try again after preprocessing and discarding comments. - One pain point with
cmake
is having to start over from a clean build directory from time to time, which by default means paying again the full cost of a cold build. Thankfully ccache
keeps its cache outside of any cmake
build directory, so the first build in the new clean build directory may be very fast.
"},{"location":"developers/building/cmake-with-ccache/#installing-and-setting-up-ccache","title":"Installing and setting up ccache
","text":"ccache
is available on most platforms. On Debian-based Linux distributions, do:
sudo apt install ccache\n
The one ccache
setting that you probably need to configure is the maximum cache size. The default 5G
is too small for our purposes. To set the cache max size, do this once:
ccache --max-size=20G\n
Tip: At the moment (late 2020), most of the code we're building is third_party/llvm-project
so the fundamental limiting factor to how far we can cache away rebuilds is how often that dependency gets updated. Given how frequently it currently is updated, I'm finding that 20G
is enough to make the ccache
size not be the limiting factor.
"},{"location":"developers/building/cmake-with-ccache/#telling-cmake-to-use-ccache","title":"Telling CMake to use ccache
","text":"Use the CMake COMPILER_LAUNCHER functionality by setting CMAKE_C_COMPILER_LAUNCHER=ccache
and CMAKE_CXX_COMPILER_LAUNCHER=ccache
in your
Notes:
- This approach only works with the
Ninja
and Makefile
generators (cmake -G
flag). When using other generators, another approach is needed, based on wrapping the compiler in a script that prepends ccache
. See this article.
"},{"location":"developers/building/cmake-with-ccache/#ensuring-that-ccache-is-used-and-monitoring-cache-hits","title":"Ensuring that ccache
is used and monitoring cache hits","text":"The ccache -s
command dumps statistics, including a cache hit count and ratio. It's convenient to run periodically with watch
in a separate terminal:
watch -n 0.1 ccache -s # update the stats readout every 0.1 seconds\n
"},{"location":"developers/building/emscripten/","title":"Building with Emscripten","text":"Emscripten is a complete compiler toolchain to WebAssembly, using LLVM, with a special focus on speed, size, and the Web platform. Emscripten can be used to compile parts of IREE to WebAssembly for execution within web browsers or other Wasm runtimes.
","tags":["Web"]},{"location":"developers/building/emscripten/#status","title":"Status","text":"IREE's runtime can be compiled through Emscripten in some limited configurations. More of the runtime will be supported over time.
IREE's compiler can be compiled through Emscripten with local changes. More work is needed for this to be generally supported.
","tags":["Web"]},{"location":"developers/building/emscripten/#prerequisites","title":"Prerequisites","text":"Read https://emscripten.org/docs/getting_started/downloads.html and run
./emsdk install latest\n./emsdk activate latest\nsource ./emsdk_env.sh\n
","tags":["Web"]},{"location":"developers/building/emscripten/#building-irees-runtime-with-emscripten","title":"Building IREE's runtime with Emscripten","text":"","tags":["Web"]},{"location":"developers/building/emscripten/#host-configuration","title":"Host configuration","text":"Build and install at least the compiler tools on your host machine, or install them from a binary distribution:
$ cmake -G Ninja -B ../iree-build-host/ \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build-host/install \\\n.\n$ cmake --build ../iree-build-host/ --target install\n
","tags":["Web"]},{"location":"developers/building/emscripten/#target-configuration","title":"Target configuration","text":"$ emcmake cmake -G Ninja -B ../iree-build-emscripten/ \\\n-DCMake_BUILD_TYPE=Release \\\n-DIREE_HOST_BIN_DIR=$(realpath ../iree-build-host/install/bin) \\\n-DIREE_BUILD_TESTS=OFF \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\n
Build:
cmake --build ../iree-build-emscripten/ \\\n--target iree_samples_simple_embedding_simple_embedding_vmvx_sync\n
","tags":["Web"]},{"location":"developers/building/emscripten/#load-into-a-webassembly-environment","title":"Load into a WebAssembly environment","text":"Copy the outputs from the build process (e.g. simple_embedding_vmvx_sync.js
and simple_embedding_vmvx_sync.wasm
) into your application and follow instructions at either https://webassembly.org/getting-started/developers-guide/ or https://developer.mozilla.org/en-US/docs/WebAssembly/Loading_and_running.
","tags":["Web"]},{"location":"developers/debugging/android-with-lldb/","title":"Android LLDB debugging","text":"This doc shows how to use LLDB to debug native binaries on Android. For a more complete explanation, see the official LLDB documentation on remote debugging.
","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#prerequisites","title":"Prerequisites","text":"We assume the following setup:
- Android NDK is installed and the
ANDROID_NDK
environment variable is set to the installation path. - Your Android device connected and configured for
adb
. - The Android binary of interest is already compiled and the command to run it (in
adb shell
) is <your-binary> [program args...]
. This does not have to be a proper Android app with a manifest, etc.
","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#running-manually","title":"Running Manually","text":" -
Push the toolchain files, including lldb-server
, to your device:
adb shell \"mkdir -p /data/local/tmp/tools\"\nadb push \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/aarch64/* /data/local/tmp/tools\n
You may need to adjust the clang toolchain version to match the one in your NDK. You can find it with find \"$ANDROID_NDK/toolchains/llvm/prebuilt\" -name lldb-server
.
-
Set up port forwarding. We are going to use port 5039 but you are free to pick a different one:
adb forward tcp:5039 tcp:5039\n
-
Start an lldb-server
in a new interactive adb shell:
adb shell\n/data/local/tmp/tools/lldb-server platform --listen '*:5039' --server\n
-
Launch lldb
, connect to the server and run the binary:
lldb -o 'platform select remote-android' \\\n-o 'platform connect connect://:5039' \\\n-o 'platform shell cd /data/local/tmp'\ntarget create <your-binary>\nrun [program args...]\n
You can either use the system lldb
or a prebuilt under \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/<your-host-arch>
.
Explanation: each -o
(short for --one-shot
) tells lldb to execute a command on startup. You can run those manually in the lldb shell, if you prefer. Then, we tell lldb which working directory to use, where to find the executable, and what command line arguments to use.
","tags":["Android"]},{"location":"developers/debugging/compile-time-regressions/","title":"Compile time regression debugging","text":"So the IREE compiler used to compile a program quickly, but it is now slower. What do you do?
"},{"location":"developers/debugging/compile-time-regressions/#initial-information-gathering","title":"Initial information gathering","text":"Try to answer as many of these questions as you can:
When did compilation get slower?
A specific git commit is ideal, but \"sometime in the last week\" is a good starting point. You'll ultimately want to find a culprit release or git commit that changed the compiler code.
How much slower did compilation get?
Be specific - did it jump from 1 minute to 2 minutes, or 1 minute to 1 hour? Identifying the scale of the regression can help set the priority to investigate it.
What is the full compile command?
Try to extract the input program and full list of flags passed to the compiler binary so that others can reproduce what you're seeing. Try to distill this as much as possible to using just native tools (no Python or other framework layers).
What environment is the compiler running in?
Are you using a Debug
build, or a release build? What operating system and size machine is running the compiler (e.g. Linux developer machine, or a smaller system)?
"},{"location":"developers/debugging/compile-time-regressions/#culprit-finding-and-bisecting","title":"Culprit finding and bisecting","text":"If you only have a rough idea of when something changed and want to narrow that down to a specific code change, bisecting can help.
"},{"location":"developers/debugging/compile-time-regressions/#running-git-bisect","title":"Running git bisect
","text":"Building the compiler from source and using git bisect
will let you pinpoint specific commits in IREE, though it typically won't let you step through changes in submodules (e.g. MLIR updates in third_party/llvm-project/
).
Tip: Configure ccache if you'll be rebuilding the compiler while bisecting
A manual workflow with git bisect
looks like this:
git bisect start --first-parent\ngit bisect good [<rev>]\ngit bisect bad [<rev>]\n\n# Read the prompts from the command as it runs\n# At each step, test the compiler:\n# git submodule update\n# cmake --build build/ --target iree-compile\n# ./build/tools/iree-compile <args>\n# attach Tracy, observe timing, print IR, etc. to determine if fast or slow\n# if fast, `git bisect good`\n# if slow, `git bisect bad`\n# repeat\n
An automated workflow can use git bisect run
and a script:
# run_bisect.sh\ngit submodule update\ncmake --build build/ --target iree-compile\n# Other logic here\n
git bisect start --first-parent\ngit bisect good [<rev>]\ngit bisect bad [<rev>]\ngit bisect run run_bisect.sh\n
"},{"location":"developers/debugging/compile-time-regressions/#sample-compile-executable-sources-individually-with-a-timeout","title":"Sample: compile executable sources individually with a timeout","text":"#!/bin/bash\n\nset -xeuo pipefail\n\n# --------------------------------------------------------------------------- #\n# Settings #\n# --------------------------------------------------------------------------- #\n\nINPUT_FILE_PATH=\"/path/to/program.mlirbc\"\nTMP_DIR=\"../iree-tmp\"\n\ndeclare -a COMPILER_FLAGS=(\n\"--iree-input-type=stablehlo\"\n\"--iree-hal-target-backends=cuda\"\n\"--iree-hal-cuda-llvm-target-arch=sm_80\"\n)\n\nTIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10\n\n# --------------------------------------------------------------------------- #\n# Utility functions #\n# --------------------------------------------------------------------------- #\n\n# Call to have `git bisect` skip this commit (don't mark as good _or_ bad)\n# https://git-scm.com/docs/git-bisect#_bisect_run\nskip_on_error() {\n>&2 echo \"** Skipping due to error: $1 **\"\nexit 125 # Special exit code for `git bisect skip`\n}\n\n# --------------------------------------------------------------------------- #\n# Main script #\n# --------------------------------------------------------------------------- #\n\n# Store git version hash, so we can dump artifacts to unique directories later.\nGIT_SHA=\"$(git rev-parse --short HEAD)\"\n\necho \"** Building iree-compile at ${GIT_SHA} **\"\n\n# The `git bisect` command only checks out a commit, so update submodules.\ngit submodule update\n\n# Build the compiler. You'll want ccache configured to make this fast!\ncmake --build ../iree-build/ --target iree-compile || skip_on_error \"CMake build failed\"\n\n# Run the compiler, dumping executable sources and stopping.\nSOURCES_DIR=\"${TMP_DIR}/sources-${GIT_SHA}\"\necho \"** Running iree-compile at ${GIT_SHA}, dumping sources to ${SOURCES_DIR} **\"\n../iree-build/tools/iree-compile \\\n${INPUT_FILE_PATH} \\\n${COMPILER_FLAGS[@]} \\\n--iree-hal-dump-executable-sources-to=${SOURCES_DIR} \\\n--compile-to=executable-sources \\\n-o /dev/null\n\n# Run the compiler again on each executable individually.\necho \"** Running iree-compile at ${GIT_SHA} for each executable source **\"\nSOURCES=($(ls -1 ${SOURCES_DIR}))\nfor SOURCE in \"${SOURCES[@]}\"; do\necho \" * Compiling: ${SOURCE} *\"\ntimeout --verbose ${TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE} \\\n../iree-build/tools/iree-compile ${SOURCES_DIR}/${SOURCE} \\\n${COMPILER_FLAGS[@]} \\\n--compile-mode=hal-executable \\\n-o /dev/null\ndone\n
"},{"location":"developers/debugging/compile-time-regressions/#profiling-and-tracing","title":"Profiling and tracing","text":"If you want to understand why the compiler is fast or slow, or if you want to compare performance in detail between two versions, consider these profiling options.
"},{"location":"developers/debugging/compile-time-regressions/#mlir-pass-timing","title":"MLIR pass timing","text":"The -mlir-timing
flag enables Pass Timing instrumentation. Once the compiler finishes running, this prints a report like
===-------------------------------------------------------------------------===\n... Pass execution timing report ...\n===-------------------------------------------------------------------------===\nTotal Execution Time: 0.0203 seconds\n\n ---Wall Time--- --- Name ---\n 0.0047 ( 55.9%) Canonicalizer\n 0.0019 ( 22.2%) VerifierPass\n 0.0016 ( 18.5%) LLVMLoweringPass\n 0.0003 ( 3.4%) CSE\n 0.0002 ( 1.9%) (A) DominanceInfo\n 0.0084 (100.0%) Total\n
This is easy data to collect, especially remotely over SSH, but it might not paint a complete picture and requires waiting for compilation to finish.
"},{"location":"developers/debugging/compile-time-regressions/#using-tracy","title":"Using Tracy","text":"See our documentation on profiling with Tracy. For compile time regressions, pay particular attention to the different compilation phases (Flow/Stream/HAL), how many times TranslateExecutablesPass
runs, and if there are outlier passes that take significantly longer to run than others.
Here are some previous analyses for inspiration:
- https://github.com/openxla/iree/issues/12033
- https://github.com/openxla/iree/issues/12035
- https://github.com/openxla/iree/issues/12183
- https://github.com/openxla/iree/issues/13189
Example slow trace:
Example fast trace:
Example sampling statistics showing 10s of minutes in LLVM codegen:
"},{"location":"developers/debugging/compile-time-regressions/#stepping-through-compiler-ir","title":"Stepping through compiler IR","text":"Debugging an MLIR-based compiler like IREE usually involves reading IR at some point. For compile time regressions, it helps to snapshot the IR at a few key phases and look for differences between fast compilation and slow compilation.
Here is one useful flag combination:
--mlir-disable-threading \\\n--mlir-elide-elementsattrs-if-larger=8 \\\n--mlir-print-ir-after=iree-hal-materialize-interfaces\n
"},{"location":"developers/debugging/integration-tests/","title":"Integration test debugging","text":"This document includes tips for triaging integration test correctness issues. Feel free to reach out to @hanhanW or ask questions on Discord for more help.
"},{"location":"developers/debugging/integration-tests/#general-tips","title":"General tips","text":""},{"location":"developers/debugging/integration-tests/#narrow-down-reproducers","title":"Narrow down reproducers","text":" - Models themselves can be large, and IREE breaks models into dispatches/kernels and then launches those individually. Program outputs could diverge starting from any individual launch. To get a smaller reproducer, you can use --iree-flow-trace-dispatch-tensors.
- You can compare the logs between builds/backends to get an idea about which dispatch results in wrong outputs. The dumped inputs can be reused in a flagfile.
Once a suspicious dispatch is identified, we can create a test case based on the dispatch function. The dispatch function can be derived after the OutlineDispatchRegions
pass. The function signatures have to be modified manually. You'll have to put flow.dispatch.tensor.load
variables to function arguments, and replace flow.dispatch.tensor.store
with return
op.
Note: This only works when dispatch formation logics are identical between runs.
"},{"location":"developers/debugging/integration-tests/#iree-samples-repository-tests","title":"iree-samples repository tests","text":"Follow README to run the model. The MLIR files will be generated. You'll find the saved file from log. E.g.,
[ RUN ] MobilenetV2Int8Test.test_compile_tflite\nI0401 17:27:04.084272 140182373025024 test_util.py:119] Setting up for IREE\nI0401 17:27:04.085064 140182373025024 binaries.py:218] Invoke IREE Pipeline:\n /tmp/iree-samples/iree-samples.venv/lib/python3.9/site-packages/iree/tools/tflite/iree-import-tflite\n /tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/model.tflite\n --mlir-print-debuginfo\n --save-temp-tfl-input=/tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/tflite.mlir\n --save-temp-iree-input=/tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/tosa.mlir\n
Unfortunately, the artifacts are not dumped in the runs. There is an issue for tracking this. A workaround can be found in the issue.
"},{"location":"developers/debugging/integration-tests/#tensorflow-integration-tests","title":"TensorFlow integration tests","text":"These are steps to reproduce/address failures in TF/TFLite integration tests. These instructions are most stable on Linux, though they may work with a few tweaks on Windows and macOS.
All steps here assume starting from the IREE root directory.
-
First create a Python virtual environment to install packages into:
python -m venv iree-tf.venv\nsource iree-tf.venv/bin/activate\n\n# Install test requirements\npython -m pip install -r ./integrations/tensorflow/test/requirements.txt\n
-
Install IREE's tools and Python bindings or build them from source
Install distributed packages
# Install packages from nightly releases\n# This should work for most cases, as the importers change infrequently\npython -m pip install \\\niree-compiler iree-runtime iree-tools-tf iree-tools-tflite \\\n--find-links https://iree.dev/pip-release-links.html\n
OR build from source
# Build Python bindings from source\ncmake -G Ninja -B ../iree-build/ -DIREE_BUILD_PYTHON_BINDINGS=ON .\ncmake --build ../iree-build/\n\n# Add IREE built-from-source Python packages to PYTHONPATH\nsource .env\n\n# Install IREE TF/TFLite Python packages\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n
-
Run the python test command line
The command can be obtained from the run file. For example, if iree_tfl_tests/llvmcpu_posenet_i8.run
failed,
cd integrations/tensorflow/test/\ncat iree_tfl_tests/llvmcpu_posenet_i8.run\n\n# REQUIRES: llvmcpu\n# RUN: %PYTHON -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=%t\n\ncd python/\npython -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=/tmp/posenet_i8_failure\n
Note that the command can only be run under integrations/tensorflow/test/python
directory.
-
Extract intermediate files and use with native tools
The test will create an iree_input.mlir
in the temp directory specified. Those can then be fed into iree-compile
(built locally to reproduce the error)
iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-input-type=stablehlo \\\niree_input.mlir\n
"},{"location":"developers/debugging/releases/","title":"Release debugging playbook","text":""},{"location":"developers/debugging/releases/#tools-and-locations","title":"Tools and Locations","text":" .github/workflows/build_package.yml
: Release packaging jobs build_tools/github_actions/build_dist.py
: Main script to build various release packages (for all platforms). We usually use this when reproing to approximate exactly what the CI does. Assumes a subdirectory of c
and writes builds to iree-build
and iree-install
as a peer of it. To use locally, just symlink your source dir as c
in an empty directory (versus checking out).
"},{"location":"developers/debugging/releases/#mapping-releases-back-to-git-commits","title":"Mapping releases back to git commits","text":"The source IREE commit SHA is embeded into pip releases in a few places. Starting in a python venv, you can find the IREE commit from both the shell:
\"$(find . -name 'iree-compile' -executable)\" --version\nIREE (https://iree.dev):\n IREE compiler version 20231016.553 @ f1cb2692a086738d7f16274b9b3af6d2c15ef133\n LLVM version 18.0.0git\n Optimized build\n
and the Python API:
python -c \"import iree.compiler.version as v; print(v.REVISIONS['IREE'])\"\nf1cb2692a086738d7f16274b9b3af6d2c15ef133\n
"},{"location":"developers/debugging/releases/#manylinux-releases","title":"Manylinux releases","text":"The Linux releases are done in a manylinux2014 docker container. At the time of this writing, it has gcc 9.3.1 and Python versions 3.5 - 3.9 under /opt/python
. Note that this docker image approximates a 2014 era RHEL distro, patched with backported (newer) dev packages. It builds with gcc and BFD linker unless if you arrange otherwise. yum
can be used to get some packages.
Get a docker shell (see exact docker image in build_package.yml workflow):
docker run --rm -it -v $(pwd):/work/c stellaraccident/manylinux2014_x86_64-bazel-4.2.2:latest /bin/bash\n
Remember that docker runs as root unless if you take steps otherwise. Don't touch write files in the /work/c
directory to avoid scattering root owned files on your workstation.
The default system Python is 2.x, so you must select one of the more modern ones:
export PATH=/opt/python/cp39-cp39/bin:$PATH\n
Build core installation:
# (from within docker)\ncd /work\npython ./c/build_tools/github_actions/build_dist.py main-dist\n\n# Also supports:\n# main-dist\n# py-runtime-pkg\n# py-xla-compiler-tools-pkg\n# py-tflite-compiler-tools-pkg\n# py-tf-compiler-tools-pkg\n
You can git bisect
on the host and keep running the above in the docker container. Note that every time you run build_dist.py
, it deletes the cmake cache but otherwise leaves the build directory (so it pays the configure cost but is otherwise incremental). You can just cd iree-build
and run ninja
for faster iteration (after the first build or if changing cmake flags). Example:
Extended debugging in the manylinux container:
cd /work/iree-build\n# If doing extended debugging in the container, these may make you happier.\nyum install ccache devtoolset-9-libasan-devel gdb\n\n# Get an LLVM symbolizer.\nyum install llvm9.0\nln -s /usr/bin/llvm-symbolizer-9.0 /usr/bin/llvm-symbolizer\n\n# You can manipulate cmake flags. These may get you a better debug experience.\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASAN=ON -DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=gold -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .\n\nninja\n\n# Or you may need this if buggy LLVM tools (like mlir-tblgen) are leaking :(\nASAN_OPTIONS=\"detect_leaks=0\" ninja\n
Other tips:
- If debugging the runtime, you may have a better time just building the Release mode
main-dist
package above once, which will drop binaries in the iree-install
directory. Then build the py-runtime-pkg
or equiv and iterate further in the build directory. Ditto for TF/XLA/etc.
"},{"location":"developers/debugging/releases/#testing-releases-on-your-fork","title":"Testing releases on your fork","text":"To avoid interrupting the regular releases published on the IREE github, you can test any changes to the release process on your own fork. Some setup is required before these github actions will work on your fork and development branch.
You can run schedule_candidate_release.yml
with a workflow dispatch from the actions tab. If you want to test using a commit other than the latest green on your main
branch, modify the section that identifies the latest green commit to search from another commit or just hardcode one.
To speed up build_package.yml
, you may want to comment out some of the builds here. The py-pure-pkgs
build takes only ~2 minutes and the py-runtime-pkg
build takes ~5, while the others can take several hours.
From your development branch, you can manually run the Schedule Snapshot Release action, which invokes the Build Release Packages action, which finally invokes the Validate and Publish Release action. If you already have a draft release and know the release id, package version, and run ID from a previous Build Release Packages run, you can also manually run just the Validate and Publish Release action.
"},{"location":"developers/debugging/sanitizers/","title":"Sanitizers (ASan/MSan/TSan)","text":"AddressSanitizer, MemorySanitizer and ThreadSanitizer are tools provided by clang
to detect certain classes of errors in C/C++ programs. They consist of compiler instrumentation (so your program's executable code is modified) and runtime libraries (so e.g. the malloc
function may get replaced).
They are abbreviated as \"ASan\", \"MSan\" and \"TSan\" respectively.
They all incur large overhead, so only enable them while debugging.
Tool Detects Helps debug what? Slowdown Memory overhead Android support ASan Out-of-bounds accesses, use-after-free, use-after-return, memory leaks Crashes, non-deterministic results, memory leaks 2x 3x Yes MSan Uninitialized memory reads Non-deterministic results 3x ? Yes TSan Data races Many bugs in multi-thread code 5x-15x 5x-10x No Note
See this documentation on leak detection. It is only enabled by default on some platforms.
"},{"location":"developers/debugging/sanitizers/#support-status-and-how-to-enable-each-sanitizer","title":"Support status and how to enable each sanitizer","text":""},{"location":"developers/debugging/sanitizers/#asan-addresssanitizer","title":"ASan (AddressSanitizer)","text":"Enabling ASan in the IREE build is a simple matter of setting the IREE_ENABLE_ASAN
CMake option:
cmake -DIREE_ENABLE_ASAN=ON ...\n
"},{"location":"developers/debugging/sanitizers/#tsan-threadsanitizer","title":"TSan (ThreadSanitizer)","text":"To enable TSan, at the moment, the following 3 CMake options must be set:
cmake \\\n-DIREE_ENABLE_TSAN=ON \\\n-DIREE_BYTECODE_MODULE_ENABLE_TSAN=ON \\\n-DIREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON \\\n-DIREE_BUILD_SAMPLES=OFF \\\n...\n
In practice, IREE_ENABLE_TSAN
alone would be enough for many targets, but not all. The problem is that a IREE runtime built with IREE_ENABLE_TSAN
cannot load a IREE compiled LLVM/CPU module unless the following flags were passed to the IREE compiler: --iree-llvmcpu-sanitize=thread
and --iree-llvmcpu-link-embedded=false
.
The CMake options IREE_BYTECODE_MODULE_ENABLE_TSAN
and IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
ensure that the above flags are passed to the IREE compiler when building modules used in tests, benchmarks, etc. (anything that internally uses the CMake iree_bytecode_module
macro).
The CMake option IREE_BUILD_SAMPLES=OFF
is needed because samples currently assume that the embedded linker is used, so they are incompatible with IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON
.
At the moment, CMake logic heavy-handedly enforces that whenever IREE_ENABLE_TSAN
is set, these other two CMake variables are also set. That ensures that all tests succeed: no test is expected to fail with TSan.
If you know what you're doing (i.e. if you are not building targets that internally involve a LLVM/CPU iree_bytecode_module
), feel free to locally comment out the CMake error and only set IREE_ENABLE_TSAN
. Also see a past attempt to relax that CMake validation.
"},{"location":"developers/debugging/sanitizers/#msan-memorysanitizer","title":"MSan (MemorySanitizer)","text":"In theory that should be a simple matter of
-DIREE_ENABLE_MSAN=ON\n
However, that requires making and using a custom build of libc++ with MSan as explained in this documentation.
As of April 2022, all of IREE's tests succeeded with MSan on Linux/x86-64, provided that the vulkan
driver was disabled (due to lack of MSan instrumentation in the NVIDIA Vulkan driver).
"},{"location":"developers/debugging/sanitizers/#ubsan-undefinedbehaviorsanitizer","title":"UBSan (UndefinedBehaviorSanitizer)","text":"Enabling UBSan in the IREE build is a simple matter of setting the IREE_ENABLE_UBSAN
CMake option:
cmake -DIREE_ENABLE_UBSAN=ON ...\n
Note that both ASan and UBSan can be enabled in the same build.
"},{"location":"developers/debugging/sanitizers/#symbolizing-the-reports","title":"Symbolizing the reports","text":""},{"location":"developers/debugging/sanitizers/#desktop-platforms","title":"Desktop platforms","text":"On desktop platforms, getting nicely symbolized reports is covered in this documentation. The gist of it is make sure that llvm-symbolizer
is in your PATH
, or make the ASAN_SYMBOLIZER_PATH
environment variable point to it.
"},{"location":"developers/debugging/sanitizers/#android","title":"Android","text":"On Android it's more complicated due to this Android NDK issue. Fortunately, we have a script to perform the symbolization. Copy the raw output from the sanitizer and feed it into the stdin
of the build_tools/scripts/android_symbolize.sh
script, with the ANDROID_NDK
environment variable pointing to the NDK root directory, like this:
ANDROID_NDK=~/android-ndk-r21d ./build_tools/scripts/android_symbolize.sh < /tmp/asan.txt\n
Where /tmp/asan.txt
is where you've pasted the raw sanitizer report.
Tip
This script will happily just echo any line that isn't a stack frame. That means you can feed it the whole ASan
report at once, and it will output a symbolized version of it. DO NOT run it on a single stack at a time! That is unlike the symbolizer tool that's being added in NDK r22, and one of the reasons why we prefer to keep our own script. For more details see this comment.
"},{"location":"developers/design-docs/cuda-backend/","title":"CUDA backend design","text":"Authored March, 2021
This document is intended to provide an overview of the design choices made to support CUDA within IREE. It describes both the HAL runtime and the NVVM codegen side.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#cuda-hal-driver","title":"CUDA HAL Driver","text":"The CUDA HAL driver is in iree/hal/drivers/cuda/
directory. It is written in C following the standards of the rest of the HAL module.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#cuda-library-dependency","title":"CUDA library dependency","text":"IREE calls directly into CUDA driver API
. CUDA library is loaded dynamically and cuda.h header from CUDA SDK is part of IREE third_party project. Therefore IREE doesn't require CUDA SDK to be installed when building iree tools.
At runtime HAL CUDA driver will load libcuda.so/nvcuda.dll library and load a subset of the cuda driver API used in HAL. The list of functions being used are in the file iree/hal/drivers/cuda/dynamic_symbols_tables.h
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#driver","title":"Driver","text":"There is no direct equivalent in CUDA to the HAL driver abstraction. We use it to hold the symbols loaded for all the devices.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#device","title":"Device","text":"The equivalent to HAL device in CUDA is the CUcontext
, it holds all the state related to memory allocations.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#command-buffer","title":"Command buffer","text":"We implement command buffers using CUDA Graph API
. Using the Graph API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams.
Note that Graph API is meant to be used for command buffers that can be recorded once and used several times and there may be a performance penalty to using Graph API for direct command buffer. It is likely that we will also have a pure stream implementation in the future if we see performance problems with direct command buffer usages.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#event-and-barrier","title":"Event and Barrier","text":"In HAL Event and Barrier are used for GPU<->GPU synchronization either within a command buffer (Event and Barrier) or between command buffers.
The current implementation ignores events and barriers and serializes all the nodes of the graph in order to have a conservative but correct solution.
The design we plan for the future is to map dependencies within a command buffer to graph dependencies in the CUDA Graph API. When an event is signaled all the leaf nodes of the graph will be saved in HAL data structure and when the same command buffer waits on the signal we will add all the nodes as dependency to the future nodes added to the graph.
For simplicity we always serialize command buffers sent to the same command queue.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#allocator","title":"Allocator","text":"The allocator will forward allocation requests to cuMemHostAlloc
for host accessible memory and cuMemAlloc
for device only memory.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#buffer","title":"Buffer","text":"CUDA buffers are represented either as a host pointer or a device pointer of type CUdeviceptr
.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#executable","title":"Executable","text":"HAL executable maps naturally to a PTX module. The compiler will generate a flat buffer containing a PTX text module as well as a list of entry point function names and the workgroup size associated with those entry points.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#semaphore","title":"Semaphore","text":"Timeline semaphore is used in IREE to handle coarse grain synchronization for CPU<->GPU, GPU<->GPU and CPU<->CPU. The interface follows closely Vulkan timeline semaphore spec
.
There is currently no simple way to implement this on CUDA. There are several solutions discussed on this IREE issue
but no obvious solution. For now we force CPU and GPU to be synchronized after every submit to ensure correctness and ignore the semaphore.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#nvvm-codegen","title":"NVVM Codegen","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#nvvm-and-ptx","title":"NVVM and PTX","text":"NVVM is a CUDA specific IR composed of LLVM IR and NVVM specific intrinsics. It can be compiled to PTX text using LLVM PTX backend. NVVM has an associated dialect in MLIR that translates 1:1 to NVVM intrinsics. This is what we are using to generate the PTX kernel code.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#iree-flow","title":"IREE flow","text":"IREE's target independent codegen converts the compiler input to Linalg on Tensors. Afterward IREE will call the LinalgToLLVMGPU codegen passes.
Once we get into LinalgToLLVMGPU passes we first do bufferize to generate Linalg on Buffers. Then we apply MLIR generic passes to convert linalg to SCF dialect and then SCF to Standard dialect. After that we convert Standard dialect to LLVM+NVVM dialect.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#example","title":"Example","text":"Save the following mlir in /tmp/add.mlir
func.func @add(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {\n %0 = tensor.empty() : tensor<4xf32>\n %1 = linalg.generic {\n indexing_maps = [\n affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = [\"parallel\"]}\n ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>)\n outs(%0 : tensor<4xf32>) {\n ^bb0(%in: f32, %in_0: f32, %out: f32):\n %2 = arith.addf %in, %in_0 : f32\n linalg.yield %2 : f32\n } -> tensor<4xf32>\n return %1 : tensor<4xf32>\n}\n
# First compile into a VM bytecode module.\n$ ../iree-build/tools/iree-compile \\\n--iree-hal-target-backends=cuda \\\n/tmp/add.mlir \\\n-o /tmp/add.vmfb\n\n# Run the module through CUDA HAL backend.\n$ ../iree-build/tools/iree-run-module \\\n--device=cuda \\\n--module=/tmp/add.vmfb \\\n--function=add \\\n--input=\"4xf32=[1 2 3 4]\" \\\n--input=\"4xf32=[2 2 2 2]\"\n\nEXEC @add\n4xf32=3 4 5 6\n
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/design-roadmap/","title":"Design roadmap","text":"A not-so-concise walkthrough of various IREE features that are in the design process and planned for future versions. A lot of the questions around how the IREE IR is designed and why certain components exist (such as the VM) hopefully become much clearer when seeing where we want to go with the infrastructure we are building (as opposed to where we currently are with our MVP slice). This document is not meant to encompass the entire design of any individual feature and if there's interest please say hi on the iree-discuss mailing list.
- Design roadmap
- Input Dialects
- Quantization
- flow: Data- and Execution-Flow Modeling
- Avoiding Readbacks with flow.stream
- Threading flow.stream through the CFG
- Predication of flow.dispatch
- Deduping flow.executables
- Rematerializing CSE'd Expressions
- Device Placement
- hal: Hardware Abstraction Layer and Multi-Architecture Executables
- Allow Targets to Specify hal.interfaces
- Target-specific Scheduling Specialization
- Buffer Usage Tracking
- Batched Executable Caching and Precompilation
- Target-aware Executable Compression
- Target-aware Constant Compression
- Command Buffer Stateful Deduplication
- Resource Timeline
- Transient Tensor Ringbuffer
- Timeline Semaphores on the Module ABI
- GPU-like CPU Scheduling
- vm: Lightweight Virtual Machine
- Coroutines for Batching and Cooperative Scheduling
- Cellular Batching
- Lowering to LLVM IR
- Improved Type Support
- Indirect Command Buffer/On-Accelerator Execution
"},{"location":"developers/design-docs/design-roadmap/#input-dialects","title":"Input Dialects","text":""},{"location":"developers/design-docs/design-roadmap/#quantization","title":"Quantization","text":"It's assumed that any work related to quantization/compression has happened prior to lowering into IREE dialects. Our plan is to use the proposed Quantization Transforms to achieve both training and inference-time quantization of types in a way that preserves maximum accuracy. IREE will support running with original unquantized floats in all cases, allowing for a smooth on-ramp to quantization and the gains in performance and reduction in model size that come from it.
As future work IREE would like to move beyond these transformation-directed approaches to quantization and interface directly to frontends which have a defined enough type system to represent accurate quantized (and otherwise compressed) computations directly, not relying exclusively on compiler-side type inference transforms.
"},{"location":"developers/design-docs/design-roadmap/#flow-data-and-execution-flow-modeling","title":"flow
: Data- and Execution-Flow Modeling","text":"The flow
dialect is designed to allow us to extract as much concurrency as possible from a program and partition IR into the scheduling and execution domains. Today we have the IR structure and transformation flow in place but have not yet got to the most interesting things such an infrastructure enables. A majority of the largest performance, latency, and memory usage improvements IREE can offer are determined first here and all following lowerings benefit. The fastest code is the code you don't execute and the smallest allocation is the allocation you don't make ;)
"},{"location":"developers/design-docs/design-roadmap/#avoiding-readbacks-with-flowstream","title":"Avoiding Readbacks with flow.stream
","text":"A majority of the readbacks we have today (manifested as flow.tensor.load.*
ops) will be removed when we have an HLO tensor->primitive conversion. There will still be cases when readbacks are required for correctness but they usually fall into a small set of usage patterns. For those that don't this is one place where IREE will warn about performance issues, allowing programs that perform suboptimally but encouraging authors to adjust their input model to enable better behavior. The IREE VM also has specific support for hiding readback latency in an efficient way via coroutines.
The most common case we are currently seeing in the IR is that of dynamic copies where the offsets are dependent on the result of previous computations. Source models may have top-k + gather operations, for example. These appear as a flow.stream
, a flow.tensor.load
, and then another flow.stream
that uses the loaded value for a flow.tensor.update
(or other operation):
%index_tensor = flow.ex.stream.fragment(...) -> tensor<i32> { ... }\n%index = flow.tensor.load %index_tensor : tensor<i32>\n%result = flow.ex.stream.fragment(%arg0 = %index : i32, ...) -> ... {\n %0 = flow.dispatch ...\n %1 = flow.tensor.update %0, %arg2[%index] : tensor<10xf32> -> tensor<1x10xf32>\n ...\n}\n
Today the flow.tensor.update
turns into HAL command buffer transfer operations that must have their offsets known at recording time. This is a limitation of vkCmdCopyBuffer
but not a fundamental limitation of any hardware. In fact several drivers implement copies as small built-in shader programs meaning that we could perform the same expansion here with the right primitives. This would allow, in the above example, both the index to be computed and the tensor to be updated within the same stream to entirely remove the host round-trip.
"},{"location":"developers/design-docs/design-roadmap/#threading-flowstream-through-the-cfg","title":"Threading flow.stream
through the CFG","text":"The current flow.ex.stream.fragment
, as denoted by the ex
perimental tag, is a temporary implementation designed to get the concept of streams lowered to the HAL dialect. For streams to be effective at modeling larger concurrency scopes they need to be able to move across branches in the CFG. This intuitively follows exactly what one would do if recording commands in C:
vkCmdCopyBuffer(cmd, ...);\nif (some_flag) {\nvkCmdBindPipeline(cmd, ..., pipeline_a);\n} else {\nvkCmdBindPipeline(cmd, ..., pipeline_b);\n}\nvkCmdDispatch(cmd, ...);\n
The corresponding flow
IR:
flow.stream.append[%s0](...) {\n flow.tensor.update ...\n }\n %b = arith.cmpi ne %some_flag, ...\n cond_br %b, ^a(%s0), ^b(%s0)\n^a(%s1):\n flow.stream.append[%s1](...) {\n flow.dispatch @pipeline_a, ...\n }\n br ^end(%s1)\n^b(%s2):\n flow.stream.append[%s2](...) {\n flow.dispatch @pipeline_b, ...\n }\n br ^end(%s2)\n^end(%s3):\n ...\n
This allows the entire stream to be lowered into one command buffer without the need for any host round-trips. The conversion into the flow
dialect will walk the CFG and attempt to thread the flow.stream
values through so long as there are no external dependencies.
"},{"location":"developers/design-docs/design-roadmap/#predication-of-flowdispatch","title":"Predication of flow.dispatch
","text":"While the flow.stream
threading through the CFG can remove many of the simpler conditional dispatches there will always be some that will have their execution dependent on the result of prior dispatches. For these a flow.cond_dispatch
will allow a condition to be provided that must be true for the dispatch to actually be performed.
For targets that natively support predication in their command buffers (such as D3D12's ID3D12GraphicsCommandList::SetPredication) this provides a host round-trip-free way of conditionally executing dispatches and transfers. Unfortunately Vulkan support is still lacking, though Nvidia supports the VK_EXT_conditional_rendering extension that exposes the same behavior.
For targets that do not support predication natively it's still possible to emulate predication with indirect dispatches. In this model the workgroup counts normally used to dispatch execution are sourced from another device buffer at the time the dispatch is made instead of sourced from the command buffer at the time the dispatch is recorded. Degenerate dispatches with counts of 0, 0, 0
allow for effective neutering of the dispatch with minimal overhead (vs. the significant penalty of a host round-trip!).
By modeling such predication at the flow
level we are able to lower into the HAL with target-aware predication semantics and fuse indirect dispatch workgroup count calculations into existing dispatches already being performed such that overhead is reduced.
"},{"location":"developers/design-docs/design-roadmap/#deduping-flowexecutables","title":"Deduping flow.executable
s","text":"While still in the flow
dialect, the executables are target-agnostic. This makes simple IR tree diffing a potential solution to deduplication. Since most of the dispatches originate from the same source-language library calls in input frameworks there's a high likelihood of duplication, and depending on when inlining is performed we may have stronger or weaker ability to perform the deduplication. Thanks to the MLIR canonicalization pass (that ensures ops are rearranged into consistent canonical representations) the IR comparisons can be done rather trivially.
"},{"location":"developers/design-docs/design-roadmap/#rematerializing-csed-expressions","title":"Rematerializing CSE'd Expressions","text":"Common subexpression elimination is performed many times during lowering, however there comes a point where the CSE can introduce false dependencies and additional allocations that are otherwise avoidable. For example if a broadcasting operation is CSE'd and then the result is used by two or more operations that are scheduled independently what would have been a relatively cheap lowering of the broadcast to a simple index remapping now becomes an additional dispatch, materialization of an intermediate tensor, and a barrier:
%bcast = \"mhlo.broadcast_in_dim\"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>\n%mul1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>\n// (pretend something here that prevents fusion)\n%mul2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>\n
%bcast = flow.dispatch.region(%cst : tensor<f32>) -> tensor<1024x10xf32> {\n %0 = \"mhlo.broadcast_in_dim\"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>\n return %0 : tensor<1024x10xf32>\n}\n// a barrier will be required here\n%mul1 = flow.dispatch.region(%arg0 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {\n %1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>\n return %1 : tensor<1024x10xf32>\n}\n%mul2 = flow.dispatch.region(%arg1 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {\n %2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>\n return %2 : tensor<1024x10xf32>\n}\n
Instead the broadcast should be rematerialized inside of both dispatch regions as the cost of doing so is significantly less in compute resources and then the intermediate tensor will not be required at all. Though at first it may seem counter-intuitive to undo such a critical optimization as CSE (both to code size and often to compute) but here it's something we must carefully balance while looking at the whole system. It gets even more important when considering multi-device execution as the cost of sharing memory and synchronizing may be extremely non-trivial.
"},{"location":"developers/design-docs/design-roadmap/#device-placement","title":"Device Placement","text":"While still within the flow
dialect we have the ability to easily split streams and safely shuffle around operations. Target execution backends can opt into such behavior to ensure that device restrictions such as maximum in-flight memory, maximum scheduling depth, and capabilities are observed. For heterogeneous configurations the intent is that certain operations, dispatches, and streams can be attributed to specify which device categories they should be lowered. The constraint solving that takes place can be provided with generic heuristics (\"big GEMMs go on the accelerator\"), profile-guided databases based on benchmarks, learned traits via ML, etc.
"},{"location":"developers/design-docs/design-roadmap/#hal-hardware-abstraction-layer-and-multi-architecture-executables","title":"hal
: Hardware Abstraction Layer and Multi-Architecture Executables","text":"As the IREE HAL is designed almost 1:1 with a compute-only Vulkan API many of the techniques classically used in real-time graphics apply. The benefit we have by modeling our usage of such a low-level API in IR is that the normal work - some of which is very non-trivial - for managing allocations, tracking resource lifetime, and ensuring proper synchronization/barriers is something we can apply the full force of an offline compiler against.
"},{"location":"developers/design-docs/design-roadmap/#allow-targets-to-specify-halinterfaces","title":"Allow Targets to Specify hal.interface
s","text":"The hal.interface
op specifies the ABI between the scheduler and the device containing the buffer bindings and additional non-buffer data (parameters, shapes, specialization flags, etc). Today a na\u00efve ordering is used uniformly for all targets however it is possible for target backends to opt into providing their own interfaces based on target configuration. The same hal.executable
may have multiple interfaces and the same backend may use one or more. This is useful for when target capabilities may vary at runtime, such as the number of available storage buffer bindings in Vulkan. By exposing a few hal.interface
variants with different binding amounts the Vulkan backend could make better use of the larger number of bindings available at runtime while still providing support for smaller configurations.
Once we have multiple hal.interface
s defined for executables the scheduler needs to emit HAL ops that properly switch between them. By having a canonical form for bindings we can ensure that only the differences between the interfaces will need additional code.
"},{"location":"developers/design-docs/design-roadmap/#target-specific-scheduling-specialization","title":"Target-specific Scheduling Specialization","text":"Though the flow
dialect attempts to fuse as many ops as possible into dispatch regions, it's not always possible for all target backends to schedule a region as a single dispatch. A classic example is algorithms like parallel reduction commonly used on GPUs that may require many dispatches to identical executables, while other algorithms may vary the executables they use based on the input parameters such as shape or the target runtime device support.
By default the flow.dispatch
executable translation to hal.executable
s is performed 1:1 and it is assumed that a single dispatch is required. Extending target backends with scheduling interfaces (enabling them to opt into different scheduling behavior) will allow the backends to emit any number of hal.executable
s and any stream commands (such as additional dispatches or transfers) they may need. This is effectively equivalent to what would be done at runtime only because we are still operating on IR prior to buffer allocation and can use the hal
ringbuffer primitive. Through this we can elide many of the allocations that would otherwise be required at runtime (and the concurrency-limiting false dependencies that usually come along with scratch memory).
Since the algorithm used may vary based on the parameters of the dispatch (such as the shape of the reduction which may be dynamically determined) scheduling specialization may occur even when targeting a single backend. In many cases folding and canonicalization can eliminate the overhead as whether one dynamically computed workgroup size is used instead of another the same IR is present.
"},{"location":"developers/design-docs/design-roadmap/#buffer-usage-tracking","title":"Buffer Usage Tracking","text":"Many explicit hardware APIs require knowing how buffers are used alongside with where they should be located. For example this additional information determines caching policy on buffer accesses (write-through, write-back, etc), visibility of writes across compute units, and the possible MMU properties that may need to be maintained/matched for the buffer. By using the SSA-form value-semantics of the MLIR tensor
as used in the flow
dialect we have complete information of where buffers may be used or at least where they enter or leave regions where we can derive such information.
Analysis passes can run over IR to attribute tensors such that when allocation is performed when lowering to the hal
dialect we do so from an allocator compatible with where the buffer will be used, with memory types chosen based on the potential cost and location of operations performed (write-only on host vs. read-write on host and device, etc), and with usage bits indicating what kind of operations may be performed on the buffer. Many of these are local transformations as most buffers are only live within very small regions such as the flow.stream
encompassing their usage.
Traditional systems need to either use very permissive buffer properties or heuristics that can introduce additional non-trivial overhead when such heuristics are incorrect. For example, OpenGL had several such usage hints that drivers were then able to use but almost no drivers behaved as desired in all cases and it lead to additional memory ghosting, copies, readbacks, and unpredictable performance. For almost all uses of the buffers within an IREE invocation we instead can know precisely where and how buffers may need to be moved and do it a minimum number of times if it is required.
"},{"location":"developers/design-docs/design-roadmap/#batched-executable-caching-and-precompilation","title":"Batched Executable Caching and Precompilation","text":"For targets that may require runtime preprocessing of their executables prior to dispatch, such as SPIR-V or MSL, the IREE HAL provides a caching and batch compilation mechanism based on Vulkan's Pipeline Cache.
Today each executable is compiled on-demand and cached only for the process lifetime. Though some drivers may provide their own caching we can make better use of the explicit caching and compilation behavior with the additional information we have in the compiler.
For any given entry point (or group of entry points) into an IREE module we can perform reachability analysis to know which executables may be executed when that entry point is invoked. In this way we can emit pre-invocation compilation checks (similar to an std::call_once
block) that provides all required executables for compilation and allows more efficient compilation through multithreading the compiler invocations. These same compilation caching function can be exposed and invoked manually by an application to force pre-compilation when it is least likely to impact the user, such as a post-install/first-run step or concurrently while other application features are loading.
We can use zero or more scoped caches for executables within a module. Completely dynamic modules (such as those emitted in eager-mode usage) may avoid the caching overhead entirely, while modules that have several primary usage modes (such as training and inference) may choose to use independent caches for each such mode.
The caches generated can then be retrieved and saved by the hosting application. Upon the next execution the application can provide the caches and if still valid they will be used to avoid compilation.
"},{"location":"developers/design-docs/design-roadmap/#target-aware-executable-compression","title":"Target-aware Executable Compression","text":"An advantage of representing executable binaries in IR after translation is that we can apply various post-compilation compression and minification techniques while still know precisely where the executable will be used. This is extremely important for SPIR-V as it is not designed to be a small at-rest format. Though the biggest lever we have to control generated code size is higher-level deduplication and specialization there will still be a sufficiently large number of executable binaries we will need to embed within the final modules and having targeted approaches for reducing their size beyond just \"gzip everything\" is very powerful.
For example, SMOL-V is a fantastic lossless SPIR-V compression technique that, when coupled with modern dictionary-based compression algorithms, can save significant binary size. As a data point, the SPIR-V corpus SMOL-V uses for testing goes from 4.8MiB of raw SPIR-V to 348KiB of compressed SMOL-V.
Combined with Batched Executable Caching and Precompilation we can easily use shared dictionaries and other cross-artifact compression in a relatively plug-in way.
"},{"location":"developers/design-docs/design-roadmap/#target-aware-constant-compression","title":"Target-aware Constant Compression","text":"It's still an area that needs more research but one goal of the IREE design was to enable efficient target- and context-aware compression of large constants (typically model weights/parameters/embeddings). This may mean reusing existing hardware compression formats on GPUs, ML accelerator-specific formats, or very-low-bit-depth (1-4 bit per value) quantization techniques that cannot be directly used without first decompressing. The inspiration here is formats like Crunch and Basis Universal that perform \"supercompression\", and we may even be able to use these directly as then we can make use of GPU hardware samplers to do the 4-bit to 32-bit decompression, etc.
"},{"location":"developers/design-docs/design-roadmap/#command-buffer-stateful-deduplication","title":"Command Buffer Stateful Deduplication","text":"The IREE HAL - much like Vulkan it is based on - eschews much of the state that traditional APIs have in favor of (mostly) immutable state objects (pipeline layouts, pipeline states, descriptor sets, etc). There are still a few stateful entry points in the API, though, and deduplicating or reordering redundant calls can reduce both IR, API, and execution overhead.
The key place this will have the largest impact is around descriptor set bindings and push descriptors, both of which are state and can have non-trivial setup overhead. A canonicalization for such commands that inspects the target hal.command_buffer
to see if the same state was set prior and code motion to move such commands out of loop bodies when possible would be helpful.
"},{"location":"developers/design-docs/design-roadmap/#resource-timeline","title":"Resource Timeline","text":"A core concept of the IREE scheduler that allows for overlapping in-flight invocations is that of the resource timeline. This identifies module state that can be in use by multiple invocations and assigns timeline milestones denoting when the resource will be in the appropriate state for the current invocation to proceed. Conceptually it is like a epoch-based synchronization mechanism as commonly found in garbage collectors to allow for lock-free asynchronous memory reclamation.
The advantage we have in the IR is that we know both the usage of all resources thanks to buffer usage tracking and the synchronization domains of all resources (in most cases). This allows us to effectively assign one timeline semaphore per writeable resource while in practice having far fewer than 1:1, as for example if two resources are only ever written in the same command buffer only one semaphore is needed to signal the completion of both writes.
By transforming IR to sink all resource reads and writes closest to where the value is used we can enlarge the time windows that can overlap across invocations that may share those resources. This is similar to what out-of-order CPUs do with register renaming/reorder buffers/etc and something we can apply some traditional instruction scheduling techniques to (only here our 'instructions' are entire command buffer dispatches/transfers).
Two degenerate cases of this approach are that of resource indirection (util.ptr<tensor<T>>
) and dynamic resource shapes. In these two cases it may not be possible to continue recording commands even if we are able to ensure execution is appropriately synchronized. This is where indirect dispatch, predication, indirect command buffers, and VM coroutines can all help cover for the times where we are unable to transform away the indirection or emit shape logic without data dependencies.
"},{"location":"developers/design-docs/design-roadmap/#transient-tensor-ringbuffer","title":"Transient Tensor Ringbuffer","text":"(When properly implemented) almost all buffers required during execution never escape the command buffers they are used in or a single VM invocation. We can trivially identify this from the explicit captures of flow.stream
and flow.dispatch
ops and the fact that all tensor types have value-semantics. Only those tensor values loaded-from/stored-to module state or that cross the exported module function boundary need special consideration while almost everything else can live transiently only so long as it is required during execution.
Thanks to this information about buffer usage and lifetime we can use a ringbuffer to store the transient tensor data and other required data reservations such as uniform buffers used to pass dynamic parameters (shapes, flags, etc) into dispatches. This gives the compiler and the application a knob that allows them to control maximum concurrency (by having a very large ringbuffer) or maximum memory usage (by having a minimally small ringbuffer).
Allocating tensors from the ringbuffer does not require sophisticated runtime packing as we can emit IR to calculate required sizes for dynamically shaped tensors. Whether a basic block reserves %sz = arith.constant 42 : index
bytes or %sz = std.muli %cst, %dyn_dim : index
bytes doesn't materially change how the allocations are performed. Since almost all usage involves simple write head bumps there is no need for ahead-of-time memory planning or large fixed allocations, and since no buffer within the ringbuffer can alias we can have coarse (read: low overhead) guarantees about the availability of certain regions of the ringbuffer (\"when this event is signaled all prior ringbuffer writes have completed\").
Usually any planning we may want to perform can be done in IR via code motion. For example applying traditional algorithms used to reduce register pressure will help us attain narrower live windows within the ringbuffer leading to a larger number of in-flight operations for the same ringbuffer memory usage.
We may end up using both a classical ringbuffer and a variant known as the bip buffer because it is better for descriptor set utilization (as we can provide many dispatch parameters with a single base offset bound once at the beginning of a region).
"},{"location":"developers/design-docs/design-roadmap/#timeline-semaphores-on-the-module-abi","title":"Timeline Semaphores on the Module ABI","text":"Functions calls made across modules (either from C++ into the VM, VM->VM, or VM->C++) should be able to define timeline semaphores used to wait and signal on the call. We can do this by making all exports automatically have the semaphores and then make invocations populate them if they were not provided by the caller. In this way we can allow multiple invocations of exported functions to chain naturally with internal asynchronous workloads, turning most IREE invocations into just recording of command buffers that can never block.
When combined with VM coroutine support we even have the ability to interleave any required host execution between the wait and signal semaphores provided such that the caller never knows on which device execution is taking place. It's still possible to provide synchronous wrappers that emulate blocking behavior but by having the core system designed around a single system-supported primitive we avoid the need for additional things like interrupt watchdog threads, implicit blocking, and other pitfalls.
"},{"location":"developers/design-docs/design-roadmap/#gpu-like-cpu-scheduling","title":"GPU-like CPU Scheduling","text":"One approach to using multiple cores on a CPU is to perform interior parallelization of operations such as OpenMP or library-call-based custom thread pools (gemmlowp). This works when each individual operation is relatively costly vs. potential pipeline bubbles caused by work spinning down near the end of an operation and spinning up at the beginning of the next.
IREE is designed to handle many more workloads - some of which have very narrow shapes but very deep pipelines (like search algorithms) - such that the above approach of multithreading within ops becomes a bottleneck. These workloads are traditionally very poorly handled by frameworks and issues with oversubscription, pipeline stalls, and suboptimal system schedulers (such as on Android) can lead to more time being spent thrashing about than actually executing real work.
The approach we take here is to treat the cores of a CPU as if they were computation units on a GPU, each able to perform some set of heterogeneous work independent of others units. This means that the concurrency we are trying to model at the flow
level and communicate to the runtime via the hal
that explicitly states which dispatches can overlap and the size of the workgroups can trivially be used to distribute this work over many cores exactly as a GPU would do it. Integration with library calls that may require their own threading (such as Ruy) requires that they be able to use the IREE thread pool instead of their own.
In this way we can avoid pipeline bubbles and other latency-inducing unpredictable scheduling. This does not mean that we treat individual units of work at the same scale as we would for GPUs, but instead that we tile and have one or more processing units that allows us to work on those tiles. Whether the tile size is defined by a library call contract, heuristics, or empirically is TBD, but expect workgroup sizes in the thousands to millions of invocations vs. normal GPU workgroup sizes in the dozens to hundreds of invocations.
To achieve this style of scheduling efficiently we'll likely use something like marl as the scheduler. Marl provides cross-platform low-overhead fibers and is compatible with this style of scheduling as it was built for the Swiftshader software rasterizer.
Even if IREE was only targeting CPUs the assertion is that we would still want to schedule this way and it's only an incidental benefit that if building for heterogeneous targets the scheduling code may be shared (just with a different divisor for workgroup count calculations).
"},{"location":"developers/design-docs/design-roadmap/#vm-lightweight-virtual-machine","title":"vm
: Lightweight Virtual Machine","text":"The VM is designed as a dynamic linkage ABI, stable bytecode representation, and intermediate lowering IR. Many of the optimizations we can perform on it will benefit all use cases (such as when lowering to LLVM IR) by allowing higher-level program transformations around synchronization that are difficult to perform on arbitrary LLVM IR.
"},{"location":"developers/design-docs/design-roadmap/#coroutines-for-batching-and-cooperative-scheduling","title":"Coroutines for Batching and Cooperative Scheduling","text":"One of the largest features currently missing from the VM is coroutines (aka user-mode fiber scheduling). Coroutines are what will allow us to have multiple in-flight invocations into a module - some of which may be waiting on external events - without the need for complex multithreading logic or state machine machinations.
In many cases once semaphores are exposed to callers we will not need to yield in the VM. The user will call into the module with provided semaphores, the work to perform will be recorded to one or more command buffers and submitted to the device, and then control return will return to the caller immediately.
In cases requiring host readbacks that we were not able to remove, however, additional VM code may need to run prior to when the final semaphore is signaled. To preserve the asynchronous interface and immediate execution guarantees the compiler can emit explicit yield points (vm.yield
) that are known-good locations for yielding (such as most resources not required after the yield having been flushed/discarded, partial synchronization scope availability if other work may be able to execute concurrently irrespective of the yielded coroutine, etc).
When the VM encounters the yield at runtime it will suspend the coroutine until a defined condition is met. Many coroutines can be in various states at any given time and - thanks to the resource timeline - can still be memory safe. For example if two stateless invocations are made with a common wait semaphore both can be recorded and submitted without waiting on each other. If there is internal module state accessed the invocations are implicitly ordered by invocation order (similar to what Vulkan calls API order) based on internal resource timeline semaphores.
Waking the coroutines can be performed by either an application-provided callback in the case of the application already having a periodic event which is doing bookkeeping (such as frame end callbacks when rendering or Looper idle events on Android), giving direct control over the frequency and location which IREE utilizes to perform additional work. A helper will be provided as well that runs a dedicated IREE thread to do this, but the expectation is that applications can often do a better (and importantly more predictable) job.
By utilizing coroutines IREE will have a way to fill traditional pipeline bubbles even with execution from the same module (let alone across modules) in the situation where host readbacks or other logic is required. This increases overall throughput and utilization while reducing host wakeups as many coroutines can be processed at once to submit new work to the device queues, though it does not help reduce per-invocation latency.
External code such as the HAL implementation or user ops may provide the wait handles used for continuation. For example, the HAL can expose a function that yields and wakes only when one or more timeline semaphores reach their target values:
// submit work\nhal.device.yield %semaphore4 >= %sem4_target, %semaphore5 >= %sem5_target\n// continue here, possibly much later in time\n
"},{"location":"developers/design-docs/design-roadmap/#cellular-batching","title":"Cellular Batching","text":"Though coroutines help throughput there is a way we've found to reduce latency that's been documented as cellular batching. This same technique has been implemented in prior internal systems and is one of the motivating design goals for IREE's creation. The core idea is to identify small uniform work that can be partitioned and scheduled greedily such as to enable batching or reduce associated invocation costs (such as refreshing accelerator SRAM/caches with new parameters). This usually manifests as finding large GEMM/GEMV operations using the same fixed parameters and either dynamically increasing the batch size by adding the waiting work (without deferring the actual execution time) or sequencing them back to back to ensure better cache utilization. Which approach is taken depends on any data dependencies that may be present (such as LSTM state feedback edges).
With the foundation of coroutines in IREE it's possible to yield execution at any given point - including during command buffer recording - and wake on specific conditions. A majority of the logic can be built into the module itself with very little need for runtime machinery, as shared VM variables can be used to track pending work across invocations (even from different parts of the program) and flush based on logic wholly controlled by the user or compiler (such as count/max time latency/etc limits). This allows for the large variety of scheduling behavior various applications may want to use, such as a zero-latency batch-only-within-this-invocation to a Nagle's Algorithm-esque time or limit based behavior or even some learned model-specific windowing.
Design work is still required on how to represent this in IR but the current thought is to model the regions in which deferred execution is possible and beneficial and allow during lowering to the VM additional transformations. This is similar to how the async-await behavior works in C# where the async keyword is just sugar that expands to additional generated helper utilities.
A simple strawman representation for sequential dispatch may look like:
hal.scheduling_policy @defer_policy {\n // max time, max count, max live memory, etc\n}\n...\nhal.command_buffer.dispatch.deferred @defer_policy, @dispatch, ...\n// vm.yield added here during lowering\n
There are many cases to explore and as cellular batching can have performance benefits of several orders of magnitudes it'll be one of the primary areas of research in the long-term.
"},{"location":"developers/design-docs/design-roadmap/#lowering-to-llvm-ir","title":"Lowering to LLVM IR","text":"For scenarios where dynamic module loading is not required and entire modules can be compiled into applications we can lower the VM IR to LLVM IR within MLIR's transformation pipeline. Instead of embedding vm.call
ops that are dispatched at runtime to things like the HAL we can instead lower to llvm::CallInst
to runtime-resolved function pointers. This still enables all of the flexibility of heterogeneous/runtime-determined devices, pluggable diagnostics, and backend composition without any need for FlatBuffers or the VM bytecode interpreter.
The VM was designed to make such a lowering easy and the C-style struct-based function pointer registration for runtime modules was designed to make emitting code that used it fairly robust even when linked in dynamically such as when embedded in shared objects.
An extension of this is what we've been calling 'runtimeless mode', where the IREE VM linkage code is statically linked into the binary alongside the generated module LLVM IR. If only a single HAL backend is linked in then (with some build-fu) we should be able to get call devirtualization to reduce code size to precisely the functionality used by the module.
"},{"location":"developers/design-docs/design-roadmap/#improved-type-support","title":"Improved Type Support","text":"Currently the VM only supports two types: i32
and vm.ref<T>
. This is an intentional limitation such that we can determine what is really needed to express the scheduling we perform, with the idea being that such a limited model will make it easier to use techniques like indirect command buffers to compile the VM itself to an accelerator executable that dispatches work without host involvement.
As we port more models we may find a few primitives that are worth bringing into the VM design such that it's worth potential complications to future porting. These includes types like f32
(for simple float calculations/comparisons), list
/dict
(easier python compatibility), and vector<4xf32>
(for simple inline calculations that are not worth dispatch overhead/synchronization).
"},{"location":"developers/design-docs/design-roadmap/#indirect-command-bufferon-accelerator-execution","title":"Indirect Command Buffer/On-Accelerator Execution","text":"Though IREE will use many different tricks such as predication to build deep pipelines there is still the requirement that the command recording and submission happens on the host CPU. Though the cost of this in terms of latency and power use can be minimized by coalescing and timelines there is still the possibility of non-trivial roundtrips being introduced that limit performance. For particular applications like low-power always-on compute or where there is significantly branchy behavior (such as search algorithms) it is important that the decision making logic as to what is dispatched runs as close to real-time as possible within the execution pipeline.
The IREE VM is designed to be runnable on-device in a secure and cooperative way (no pointers, indirect buffer handles to allow for memory space rearrangement op-to-op, deterministic execution and explicit yield points, etc).
The recent efforts to bring indirect command buffers to Vulkan and Metal's Indirect Command Buffers (that both derive inspiration from NV_command_list) are one such target for this. Either by lowering the VM IR to LLVM IR or SPIR-V, by a special conversion to target-specific forms, or by actually executing the VM bytecode directly on-device (it's ~1000 LoC) we should be able to prototype what full on-device usage is like. Even if only some VM functions the compiler deems useful to schedule on the device are used and the rest run on the host (particularly those functions calling imported functions) some of the most costly logic that creates tight coupling of the host and device scheduling can be limited.
"},{"location":"developers/design-docs/function-abi/","title":"Function ABI","text":"Note
Authored December, 2019
Updated August, 2021
A key job of the IREE compiler and runtime is capturing function call semantics from the originating system and providing mechanisms so that invocations can be performed in as similar way as possible in various target languages. In general, this requires additional metadata on top of the raw characteristics of a function. Where possible, this is done by attaching attributes to a function.
iree.abi
: JSON encoded description of the function's calling convention.
"},{"location":"developers/design-docs/function-abi/#v1-abi","title":"V1 ABI","text":"This is the default ABI supported by the IREE VM invocations. It attempts to provide a default calling convention that can be used without further reflection metadata but which may be enhanced with it.
It natively allows monomorphic functions to be exported where arguments and results are composed of the following types:
"},{"location":"developers/design-docs/function-abi/#value-types","title":"Value Types:","text":" - Byte aligned integer type (i8, i16, i32, i64)
- Floating point value (f16, f32, f64)
"},{"location":"developers/design-docs/function-abi/#reference-types","title":"Reference Types:","text":" -
ND-Array buffers of Value Types:
- Simple: Packed, C-layout
- Strided: Arbitrary layout with strides (future)
-
String (byte arrays)
-
Opaque reference object
"},{"location":"developers/design-docs/function-abi/#sequence-types","title":"Sequence Types:","text":" - Tuples: fixed length lists where each position has its own type bound
- Homogenous list: lists of arbitrary size where a single type bound applies to all elements
The intent with these low level types is that calling conventions can be synthesized to bind arbitrary high level, domain/language specific signatures to these types, possibly by way of additional reflection metadata.
"},{"location":"developers/design-docs/function-abi/#representations","title":"Representations:","text":"The above are all representable with native constructs in the VM:
-
ValueType:
- Runtime:
iree_vm_value
- Compile Time: primitive MLIR integer/floating point types
-
Simple ND-Array Buffer:
- Runtime:
iree_hal_buffer_view
- Compile Time:
tensor<>
-
String:
- Runtime:
iree_vm_list
containing i8
- Compile Time:
!util.list<i8>
-
Tuple:
- Runtime:
iree_vm_list
of variant - Compile Time:
!util.list<?>
- Note that these are statically type erased at the boundary.
-
TypedList (homogenous):
- Runtime:
iree_vm_list
of T
- Compile Time:
!util.list<T>
"},{"location":"developers/design-docs/function-abi/#extended-type-calling-conventions","title":"Extended Type Calling Conventions","text":"While the above features of the native ABI may be sufficient for direct use by various programs, many programs and callers will need to represent various higher level types, consistently mapping them to the above facilities. This section describes calling conventions for various higher level types which do not map 1:1 to the above. Not all source language types are representable, and extending these calling conventions (and the fundamental types above) is demand driven.
All of these calling conventions presume that the arity of the arguments/results of the raw function matches the user-level function, meaning that the calling convention is specified per argument/result. Higher-level whole function transformations may also exist for some domains but are outside of the scope of this specification.
"},{"location":"developers/design-docs/function-abi/#structure","title":"Structure","text":"A Structure
is a common enough entity to have a dedicated calling convention. In C-like languages, this may just be a struct
. In Python, it is typically a dict
with an associated schema providing a name and type bound for each of its slots. In both, its slots are of fixed arity.
In this convention, such a structure is represented as a Tuple
in the native calling convention (i.e. !util.list
of variant type). The order of the elements of the tuple are the natural order of the structure, where that is either:
- For a C-like system where order is determinate, it is the order of declaration.
- For a name-based system (i.e. bind to
dict
) where no order is defined, the natural order will be the lexically sorted order of the keys.
"},{"location":"developers/design-docs/function-abi/#string","title":"String","text":"Most languages interop between byte arrays (i.e. the native ABI String
type) by way of applying an encoding. Such strings are just a sequence of bytes (i.e. !util.list<i8>
).
"},{"location":"developers/design-docs/function-abi/#typed-list","title":"Typed List","text":"High level lists which all share the same type bound are represented as a TypedList
in the native ABI.
"},{"location":"developers/design-docs/function-abi/#ndarray-of-reference-types","title":"NDArray of Reference Types","text":"NDArrays of reference types are considered separately from those of value types. Internally, the code generated for them is completely different from what gets generated for numeric based arrays (i.e. has ref-counting, ownership semantics, non-POD, etc). These types are permitted for completeness, not necessarily performance: by nature they are already indirected and have overheads.
In the native ABI, these are represented as a composite tuple type (i.e. today a list since sugar for tuple is not yet defined): !iree.tuple<!util.list<T>, !util.list<index>>
. The first element of the tuple is the list of values, packed with a C-Layout and the second element is the list of dimension sizes.
"},{"location":"developers/design-docs/function-abi/#reflection","title":"Reflection","text":"Additional reflection metadata may be encoded in a custom JSON form, providing additional typing hints for arguments and results. If present, this will be a reflection attribute with key d
, containing a serialized JSON object.
The JSON object contains:
a
(array): List of type records for each argument. r
(array): List of type records for each argument.
Type records are one of:
-
A string naming a primitive type:
i[0-9]+
: Integer type with given bit width f[0-9]+
: IEEE floating point type with given bit width bf16
: BFloat16
-
JSON null
: A null reference value
-
\"unknown\"
: An unknown/unmapped type
-
An array, interpreted as a tuple describing a compound type.
"},{"location":"developers/design-docs/function-abi/#compound-type-tuples","title":"Compound type tuples","text":"A compound type tuple has a type identifier as its first element, followed with type specific fields:
[\"named\", \"key\", {slot_type}]
: Associates a name with a slot. This is used with the root argument list to denote named arguments that can be passed positionally or by keyword. [\"ndarray\", {element_type}, {rank}, {dim...}]
: For unknown rank, the rank
will be null
and there will be no dims. Any unknown dim will be null
. [\"slist\", {slot_type...}]
: An anonymous structured list of fixed arity and slot specific types. If there are gaps in the list, empty slots will have a null
type. [\"stuple\", {slot_type...}]
: Same as slist
but some languages differentiate between sequences represented as lists and those represented as tuples (read-only lists). [\"sdict\", [\"key\", {slot_type}]...]
: An anonymous structure with named slots. Note that when passing these types, the keys are not passed to the function (only the slot values). [\"py_homogeneous_list\", {element_type}]
: A Python list of unknown size with elements sharing a common type bound given by element_type
.
"},{"location":"developers/design-docs/invocation-execution-model/","title":"Invocation execution model","text":"Authored June, 2022
This documents the behavior of the user-visible invocation mechanism IREE uses to schedule program execution. Internally IREE uses a very similar modeling for tracking its internal workloads and in kind carries that down to target APIs and devices that themselves use a very similar model. The intent is to expose the device model in an abstracted way that allows for the full capture and communication of the execution intent to be propagated to the hardware that executes it. Though here we focus on the user-visible portion of execution there is really only one \"IREE execution model\" and the entire stack follows the same design. At its core this design is just an instantiation of an out-of-order execution algorithm such as those originating from the 1960's.
"},{"location":"developers/design-docs/invocation-execution-model/#glossary","title":"Glossary","text":"stateDiagram\n state UserApplication {\n direction BT\n state Context0 {\n ModuleA-->ModuleAState0\n ModuleB-->ModuleBState0\n }\n state Context1 {\n ModuleA-->ModuleAState1\n ModuleB-->ModuleBState1\n ModuleC-->ModuleCState1\n }\n state ModuleA {\n @func1\n @func2\n }\n state ModuleB {\n @func3\n @func4\n }\n state ModuleC {\n @func5\n }\n }
"},{"location":"developers/design-docs/invocation-execution-model/#program","title":"Program","text":"An IREE program is a collection of modules instantiated in a context from which invocations can be made. Invocations are ordered on a user-controlled timeline that uses fences to define the execution order requirements to enable out-of-order execution. A hosting user application may have multiple programs or multiple instances of the same program available and running invocations at a time across multiple timelines.
"},{"location":"developers/design-docs/invocation-execution-model/#module","title":"Module","text":"Modules define executable code and data that can be loaded, linked, and run \u00e0 la ELF shared libraries. Modules may be implemented as C/C++, generated bytecode or C sources from the IREE compiler, or any other mechanism that can run code and implement the iree_vm_module_t
interface. Modules on their own are read-only and can be reused across many contexts.
Traditional ML runtimes would use a model (graph, etc) as their module representation. In IREE everything is a module including runtime subsystems like the HAL and user-provided custom code. This ensures that anything IREE can do can be externalized and replaced by users without needing to modify the core IREE code.
"},{"location":"developers/design-docs/invocation-execution-model/#context","title":"Context","text":"A collection of modules are linked and instantiated in a context. Each context operates independently and carries its own copies of mutable module state. Invocations execute within a context scope and hosting applications coordinate across contexts as required. Contexts are cheap to create (microseconds) and retain (~100B + program state) such that users can decide how to manage them based on their scenario.
Traditional ML runtimes would call these \"sessions\" but in IREE everything is a program. Whether the program is stateful or stateless and how the program is invoked is up to the program author.
"},{"location":"developers/design-docs/invocation-execution-model/#invocation","title":"Invocation","text":"An invocation represents a single call into a module exported function using the program state stored in a context. Users can decide whether to perform synchronous blocking invocations or asynchronous non-blocking invocations per-call; the behavior of the invocation is independent from the target function and a user program may contain a mix of both.
As an example a user program may synchronously invoke a @query_output_shapes
function to preallocate storage for an asynchronous @execute_in_place
function to write into.
"},{"location":"developers/design-docs/invocation-execution-model/#timeline","title":"Timeline","text":"A timeline represents the observable order of execution. Users define their own timelines and communicate them to IREE via fences. Timelines do not match up with the order of invocations unless the user dictates they must by way of fences. In the absence of fences all invocations execute in an arbitrary order and they may execute concurrently just as threads in C with no barriers.
Each timeline can be thought of as an independent clock domain that may operate asynchronously at its own frequency with only fences acting to tie separate timelines together. This directly mirrors real hardware constraints like clock domain crossing as each execution scope (thread on core, driver calls to queues, kernel queues to device queues, device queues to compute unit queues, etc) is naturally operating at different rates and well-designed systems must tolerate that variability.
"},{"location":"developers/design-docs/invocation-execution-model/#fence","title":"Fence","text":"A fence is a specific point of progress in one or more timelines acting as a barrier, fork, or join point. Fences only guard execution ordering and not any particular resources though users can use them to guard resources by defining when in time the resources are available for use.
Waits on fences are wait-until operations specifying that the timeline must reach at least a specific point. This allows for flexible reordering and deferral of execution as executors can pull forward scheduled work based on policy (run similar work together, etc).
"},{"location":"developers/design-docs/invocation-execution-model/#hardware-abstraction-layer-hal","title":"Hardware Abstraction Layer (HAL)","text":"The HAL is an optional feature of IREE that is used to provide a consistent interface across execution resources. It is used internally by IREE programs to define and submit work to devices and signal across them but may also be used by users to directly interface with hardware in a compatible way. Exposing the HAL API allows for users to efficiently manage their data and custom execution without expensive marshaling. Most users will only interact with HAL buffers as they work with their data but more advanced integrations can directly insert IREE into existing device contexts to transparently share scheduling and resources or insert their own code into IREE to pipeline custom execution.
"},{"location":"developers/design-docs/invocation-execution-model/#execution-by-timelines","title":"Execution by Timelines","text":"NOTE: this defines an execution scheme that IREE supports but a user may use one or more such schemes in a single program - just as a C application may mix single- and multi-threaded code within itself for different components.
The combination of invocations, timelines, and fences allows users to provide future knowledge to lower layers of the system by declaring their availability requirements and the lower layers are then able to execute the work out-of-order so long as the specified requirements are met. The primary goal when designing for such a system is to specify as few requirements as possible in order to provide the maximum amount of scheduling freedom to the implementation.
This makes timelines one of the most critical components of the interface. The purpose of invocations is to schedule work against one or more timelines and what happens within the invocations is an implementation detail of the program.
"},{"location":"developers/design-docs/invocation-execution-model/#sequential-execution","title":"Sequential Execution","text":"Here we say \"a user invokes a function to schedule execution on a timeline\" vs. a more traditional \"a user invokes a function to execute work\" and this manifests in the IREE ABI as invocations taking fences defining specific points on timelines of which the user may observe:
# Fences are effectively just timeline + integer tuples and are cheap to hold.\nwait_fence = my_timeline.at(t)\nsignal_fence = my_timeline.at(t+1)\n# Schedule work against the timeline.\n# All work prior to t must complete before execution can occur and after\n# execution the timeline will advance to t+1.\nasync_invoke(@some_fn, wait_fence, signal_fence)\n# The invocation may have returned immediately after the work was scheduled;\n# until the fence is reached no actual execution may have occurred. To\n# synchronize the user code with the timeline the user can block until the fence\n# is reached.\nsignal_fence.wait()\n
To the user this would appear as:
sequenceDiagram\n User->>@some_func: invoke\n activate @some_func\n @some_func->>User: ;\n @some_func-->>@some_func: wait t\n @some_func-->>User: signal t+1\n deactivate @some_func
This means from the user's perspective the actual operations performed by the invocation are not important: the only thing the user can observe in this situation is when the timeline reaches t+1
as they specified. Whether internally the invocation needs many steps to complete as there are timelines internal to the program is an implementation detail. Actual execution may look like this:
sequenceDiagram\n User->>@some_func: invoke\n activate @some_func\n @some_func->>User: ;\n @some_func->>@some_func: ;\n @some_func-->>Device A: ;\n Device A-->>Device A: wait t\n activate Device A\n @some_func->>@some_func: ;\n @some_func-->>Device B: ;\n activate Device B\n @some_func->>@some_func: ;\n Device A-->>@some_func: ;\n deactivate Device A\n @some_func->>@some_func: ;\n @some_func-->>Device B: ;\n activate Device B\n deactivate @some_func\n Device B-->>User: signal t+1\n deactivate Device B\n deactivate Device B
Even in this simple user-synchronous example the system is able to internally run several concurrent timelines with a minimal number of synchronization points and the lowest possible latency as the user is immediately notified without any intermediate layers needing to be woken, scheduled, executed, and passed on.
"},{"location":"developers/design-docs/invocation-execution-model/#pipelined-execution","title":"Pipelined Execution","text":"The true power of timelines comes from the ability to pipeline execution. Users define DAGs with fences and can construct arbitrarily complex execution topologies whether from the same program or across multiple programs:
stateDiagram\n direction LR\n state fence0 <<fork>>\n [*] --> fence0\n fence0 --> @fn0\n state fence1 <<fork>>\n @fn0 --> fence1\n fence1 --> @fn1\n fence1 --> @fn2\n state fence2 <<join>>\n @fn1 --> fence2\n @fn2 --> fence2\n @fn3 --> fence2\n fence0 --> @fn4\n @fn4 --> fence2\n fence2 --> [*]
This is a simple extension to the synchronous example using the same primitives:
# Timeline is defined by the user.\nfence_a = my_timeline.at(t)\nfence_b = my_timeline.at(t+1)\nfence_c = my_timeline.at(t+2)\n# Invocations are launched using the fences and may not complete immediately.\nasync_invoke(@fn0, fence_a, fence_b)\nasync_invoke(@fn1, fence_b, fence_c)\nasync_invoke(@fn2, fence_b, fence_c)\nasync_invoke(@fn3, None, fence_c)\nasync_invoke(@fn4, fence_a, fence_c)\n# Blocking here but no need to; could pass fence_c on to other invocations.\nfence_c.wait()\n
The critical point of this being that the user never had to wait for any particular invocation to complete before being able to schedule more work against the timeline, even if those invocations could themselves not complete synchronously. The lower layers of the system are able to fully model the execution as early as possible without needing to communicate (and importantly synchronize) with the user.
"},{"location":"developers/design-docs/invocation-execution-model/#io","title":"I/O","text":"Users define the semantics of their programs themselves. For example if the user knows the precise shape of an output buffer they can preallocate the buffer and pass it in. If they don't know they can decide to factor out the shape calculation and invoke that synchronously in order to compute the shape, allocate the appropriately sized buffer, and pass that in. Or they could decide to only deal with synchronous invocations and return a program-allocated buffer view with the appropriate shape in their callback. IREE does not dictate the design of user programs and as such enables mixed stateful/stateless, asynchronous/synchronous, and arbitrary scheduling models (enqueue/drain, windowing, etc).
Inputs and outputs to invocations are provided by the user as primitive values (integers, floats, etc), supported builtin types (lists, byte buffers/strings), custom user types, and HAL types like buffers or buffer views (buffers + shape and type metadata). One or more wait fences can be used to order invocation access to one or more inputs by indicating that the resource is not available until a certain fence is reached. Similarly one or more signal fences can be used to order subsequent access to the resources by indicating the advancement of the timeline when they are available.
# wait_fence_a must be reached before buffer_a and buffer_b can be read.\n# wait_fence_b must be reached before buffer_c can be read.\n# buffer_a will be ready to read when signal_fence_a has been reached.\nasync_invoke(@fn,\n (wait_fence_a, buffer_a, buffer_b),\n 42, # no ordering required on value types\n (wait_fence_b, buffer_c),\n (signal_fence_a, buffer_a))\n
The above example demonstrates an in-place operation on buffer_a
. It's also possible for invocations to return values:
result = invoke(@sum, 1, 2) # = 3\n
When executed asynchronously a callback or any construct that can be built upon them (like promises/futures) can receive the results:
def my_callback(result):\n print(result) # 3\nasync_invoke(@sum, 1, 2, my_callback)\n
"},{"location":"developers/design-docs/invocation-execution-model/#stream-ordered-allocations","title":"Stream-ordered Allocations","text":"Invocations generally have only a few KB of overhead and pipelined command buffers take only a small amount more. Storage buffers, however, can easily take hundreds of MB per invocation for I/O and transient state. This compounds as program usage becomes more complex or multiple programs are involved. IREE supports traditional host-ordered allocations (\u00e0 la malloc/free) for persistent buffers like large constants/read-only data or user-managed ringbuffers. Stream-ordered allocations are also supported to allow for pooled buffer reservations that can be allocated in a scheduled order alongside program execution.
For more detailed examples see the CUDA blog posts describing their implementation: part 1, part 2.
With stream-ordered allocations each allocation and deallocation operation is scheduled with wait and signal fences just as with invocations. This allows these allocation operations to execute remotely on device without host program involvement. For example, scheduling alloca0
/dealloca0
and alloca1
/dealloca1
interleaved with the function execution allows for the transient memory required for executing @fn0
to remain uncommitted until immediately before it is executed, committed during execution, and then decommitted immediately after execution. The memory required for passing data from @fn0
to the subsequent @fn1
and @fn2
survives until after they have completed executing before being decommitted. By using the same scheduling primitives as execution the allocation topology can be as arbitrarily complex as the invocation topology:
stateDiagram\n direction LR\n state fence0a <<fork>>\n [*] --> fence0a\n state fence0b <<fork>>\n fence0a --> alloca0\n fence0a --> alloca1\n alloca0 --> fence0b\n alloca1 --> fence0b\n fence0b --> @fn0\n state fence1a <<fork>>\n @fn0 --> fence1a\n state fence1b <<fork>>\n fence1a --> dealloc0\n dealloc0 --> fence1b\n fence1b --> @fn1\n fence1b --> @fn2\n state fence2a <<join>>\n @fn1 --> fence2a\n @fn2 --> fence2a\n state fence2b\n fence2a --> dealloc1\n state fence2b <<join>>\n dealloc1 --> fence2b\n fence2b --> [*]
When operating in this way allocations from the host-perspective are just reservations for a slice of pooled storage that will be committed at some point in the future. Likewise deallocations from the host-perspective release the prior reservation and schedule the paired decommit at some point in the future. Scheduling N sequential invocations thus requires only enough committed storage for a single invocation in addition to the I/O (unless that too is stream-ordered).
This scheduling behavior allows for both minimal peak memory consumption regardless of the number of programs or invocation pipeline depth and sharing of committed storage across programs: the memory consumption of a program at rest is near zero when stateless and the sum of all state when stateful. Target devices that natively support stream-ordered allocations (like CUDA) can even share pools across processes.
The other provided feature in combination with the fence guaranteed forward progress is that so long as the memory pool can service a single request execution can still continue even when constrained. A device can serialize two independent invocations requiring 400MB of transient memory when the system only has 512MB available with no user-visible impact besides increased latency. This does require the user to ensure they schedule work that is possible to run or rely on the target system having paging in order to lighten the strictness of the pool quotas.
Stream-ordered allocations performed by the user for invocation inputs can be declared as transferred to the program. This allows the program to eagerly deallocate or reuse the input storage while still preserving the internal scheduling requirements of the program.
"},{"location":"developers/design-docs/invocation-execution-model/#internal-state","title":"Internal State","text":"A stateful program may contain internal timelines that it uses to order its own execution. Take for example this simple stateful program:
class TrivialKernel(Program):\n _x0 = Program.export_global(x_type)\n def get(self):\n return self._x0\n def set(self, x=x_type):\n self._x0 = x\n def matmul(self, x=y_type):\n self._x0 = self._matmul(x, self._x0)\n @Program.kernel\n def _matmul(x, x0):\n return jnp.matmul(x, x0)\n
Each invocation of matmul
needs to be executed in-order with prior invocations as there is a data dependency established on self._x0
. Attempts to get
or set
must also be sequenced correctly with the matmul
invocations. A basic usage like this:
m = TrivialKernel()\nm.set(input)\nm.matmul(a)\nm.matmul(b)\nm.matmul(c)\noutput = m.get()\nprint(output) # implicit wait\n
Would be executed as:
sequenceDiagram\n activate User\n User->>TrivialKernel: @set(input)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n activate Device\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(a)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(b)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(c)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @get()\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n Device-->>Device: ;\n deactivate User\n User->>User: (wait)\n Device-->>User: (signal)\n deactivate Device\n activate User\n User->>User: print(output)\n deactivate User
Note that although the user provided no timeline of their own execution is still ordered correctly due to the internal timeline constructed by the program. If the user wanted to also pipeline execution with another program they could do so by providing their own fences.
"},{"location":"developers/general/contributing/","title":"Contributing to IREE","text":"We'd love to accept your patches and contributions to this project.
Please file issues or reach out on any of our other communication channels before doing substantial work; this will ensure that others don't duplicate the work and that there's a chance to discuss any design issues.
"},{"location":"developers/general/contributing/#developer-policies","title":"Developer policies","text":""},{"location":"developers/general/contributing/#code-of-conduct","title":"Code of conduct","text":"This project follows the OpenXLA Code of Conduct.
"},{"location":"developers/general/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"Contributions to this project must be accompanied by a Contributor License Agreement (CLA). Head over to https://cla.developers.google.com/ to see your current agreements on file or to sign a new one.
- You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project.
- You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again.
"},{"location":"developers/general/contributing/#coding-style-guidelines","title":"Coding style guidelines","text":"Most of the code style is derived from the Google Style Guides for the appropriate language and is generally not something we accept changes on (as clang-format and other linters set that for us). The C++ compiler portion of the project follows the MLIR/LLVM style guide.
Improvements to code structure and clarity are welcome but please file issues to track such work first. Pure style changes are unlikely to be accepted unless they are applied consistently across the project.
Tip - code formatters and lint scripts Formatters like clang-format
(C/C++) and Black (Python) can be set to run automatically in your editor of choice.
The script at build_tools/scripts/lint.sh
can also be used to run the full suite of lint checks.
"},{"location":"developers/general/contributing/#code-reviews","title":"Code reviews","text":"All submissions, including submissions by maintainers, require review. We use GitHub pull requests (PRs) for this purpose. Consult GitHub Help for more information on using pull requests.
- Please keep PRs small (focused on a single issue) to make reviews and later culprit-finding easier.
- You may see trusted core contributors bending this rule for project maintenance and major subsystem renovation. If you feel like the rules aren't working for a certain situation, please ask as we bias towards pragmatism for cases that require it.
"},{"location":"developers/general/contributing/#github-actions-workflows","title":"GitHub Actions workflows","text":"We use GitHub Actions to automatically build and test various parts of the project.
- Most presubmit workflows will only run automatically on PRs if you are a project collaborator. Otherwise a maintainer must approve workflow runs. If you are sending code changes to the project, please ask to be added as a collaborator, so that these can run automatically.
- It is generally expected that PRs will only be merged when all checks are passing. In some cases, pre-existing failures may be bypassed by a maintainer.
Tip - adjusting workflow behavior Some workflows only run on commits after they are merged. See the CI behavior manipulation section below to learn how to customize this behavior.
"},{"location":"developers/general/contributing/#merging-approved-changes","title":"Merging approved changes","text":"After review and presubmit checks, PRs should typically be merged using \"squash and merge\".
- The squashed commit summary should match the PR title and the commit description should match the PR body (this is the default behavior). Accordingly, please write these as you would a helpful commit message.
It is assumed that the PR author will merge their change unless they ask someone else to merge it for them (e.g. because they don't have write access yet).
"},{"location":"developers/general/contributing/#obtaining-commit-access","title":"Obtaining commit access","text":"Access to affiliated repositories is divided into three tiers:
Tier Description Team link Triage New project members should typically start here Can be assigned issues Can apply labels to issues / PRs Can run workflows without approval iree-triage Write Established project contributors should request this access Can merge approved pull requests Can create branches iree-write Maintain Can edit repository settings Can push to protected branches iree-maintain All access tiers first require joining the OpenXLA GitHub organization.
Fill out this form to request access
Once you are a member of the OpenXLA GitHub organization, you can request to join any of the teams on https://github.com/orgs/openxla/teams.
Note: other GitHub organizations
Work on IREE sometimes spans other GitHub organizations like iree-org and shark-infra. Reach out to a project member if you would also like access to repositories in those organizations.
"},{"location":"developers/general/contributing/#credits-in-the-authors-file","title":"Credits in the AUTHORS file","text":"If you would like additional recognition for your contributions, you may add yourself or your organization to the AUTHORS file that keeps track of those who have made significant contributions to the project.
- Please add the entity who owns the copyright for your contribution.
- The source control history remains the most accurate source for individual contributions.
"},{"location":"developers/general/contributing/#tips-for-contributors","title":"Tips for contributors","text":""},{"location":"developers/general/contributing/#tool-recommendations","title":"Tool recommendations","text":"Program or tool Description Visual Studio Code (VSCode) The most commonly used editor amongst IREE developers Ccache A fast C/C++ compiler cache. See the CMake with ccache
page GitHub CLI A CLI for interacting with GitHub \"Refined GitHub\" browser extensions Extension that add features to the GitHub UI"},{"location":"developers/general/contributing/#build-systems","title":"Build systems","text":"IREE supports building from source with both Bazel and CMake.
- CMake is the preferred build system and offers the most flexible configuration options
- Bazel is a stricter build system and helps with usage in Google's downstream source repository
- Certain dependencies (think large/complex projects like CUDA, TensorFlow, PyTorch, etc.) may be difficult to support with one build system or the other, so the project may configure these as optional
"},{"location":"developers/general/contributing/#continuous-integration-ci","title":"Continuous integration (CI)","text":"IREE uses GitHub Actions for CI. The primary CI is configured in the ci.yml workflow file.
"},{"location":"developers/general/contributing/#self-hosted-runners","title":"Self-hosted runners","text":"In addition to the default runners GitHub provides, IREE uses self-hosted runners to run many of its workflow jobs. These enable access to additional compute and custom configurations such as accelerators. Configuration scripting is checked in to this repository (see the README for that directory).
"},{"location":"developers/general/contributing/#custom-managed-runners","title":"Custom managed runners","text":"In addition to our self-hosted runners, we use GitHub's large managed runners for some platforms.
"},{"location":"developers/general/contributing/#ci-behavior-manipulation","title":"CI behavior manipulation","text":"The setup step of the CI determines which CI jobs to run. This is controlled by the configure_ci.py script. It will generally run a pre-determined set of jobs on presubmit with some jobs kept as post-submit only. If changes are only to a certain set of excluded files that we know don't affect CI (e.g. Markdown files), then it will skip the jobs.
You can customize which jobs run using git trailers in the PR description.
The available options are
ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\nskip-ci: free form reason\nskip-llvm-integrate-benchmark: free form reason\nbenchmark-extra: extra,benchmarks,to,run\nrunner-env: [testing|prod]\n
Using skip-ci
skip-ci
skips all jobs. It is mutually exclusive with the other ci-*
options and is synonomous with ci-skip: all
.
skip-ci: free form reason\n
Using ci-skip
, ci-extra
, ci-exactly
The ci-*
options instruct the setup script on which jobs to include or exclude from its run. They take a comma-separated list of jobs which must be from the set of top-level job identifiers in the ci.yml
file or the special keyword \"all\" to indicate all jobs.
ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\n
ci-skip
removes jobs that would otherwise be included, though it is not an error to list jobs that would not be included by default. ci-extra
adds additional jobs that would not have otherwise been run, though it is not an error to list jobs that would have been included anyway. It is an error to list a job in both \"skip\" and \"extra\". ci-exactly
provides an exact list of jobs that should run. It is mutually exclusive with both \"skip\" and \"extra\".
In all these cases, the setup does not make any effort to ensure that job dependencies are satisfied. Thus, if you request skipping the build_all
job, all the jobs that depend on it will fail, not be skipped.
Using benchmark-extra
, skip-llvm-integrate-benchmark
benchmark-extra: extra,benchmarks,to,run\nskip-llvm-integrate-benchmark: free form reason\n
Benchmarks don't run by default on PRs, and must be specifically requested.
The benchmark-extra
option allows specifying additional benchmark presets to run as part of benchmarking. It accepts a comma-separated list of benchmark presets. This combines with labels added to the PR (which are a more limited set of options). See the benchmark suites documentation.
Benchmarks do run by default on PRs detected to be an integration of LLVM into IREE, but this behavior can be disabled with skip-llvm-integrate-benchmark
.
Using runner-env
The runner-env
option controls which runner environment to target for our self-hosted runners. We maintain a test environment to allow testing out new configurations prior to rolling them out. This trailer is for advanced users who are working on the CI infrastructure itself.
runner-env: [testing|prod]\n
"},{"location":"developers/general/contributing/#ci-configuration-recipes","title":"CI configuration recipes","text":"Copy/paste any of these at the bottom of a PR description to change what the CI runs.
-
Also run Windows and macOS builds that are normally post-merge only:
ci-extra: build_test_all_windows,build_test_all_macos_arm64,build_test_all_macos_x86_64\n
-
Also run GPU tests on NVIDIA A100 runners (opt-in due to low availability):
ci-extra: test_a100\n
-
Skip all CI builds and tests, e.g. for comment-only changes:
skip-ci: Comment-only change.\n
-
Only run Bazel builds, e.g. for changes only affecting Bazel rules:
ci-exactly: build_test_all_bazel\n
For example, this PR opted in to running the build_test_all_windows
job:
The enabled jobs can be viewed from the Summary page of an action run:
"},{"location":"developers/general/contributing/#git-workflows","title":"Git workflows","text":"We tend to use the \"triangular\" or \"forking\" workflow. Develop primarily on a clone of the repository on your development machine. Any local branches named the same as persistent branches from the main repository are pristine (though potentially stale) copies. You only fastforward these to match upstream and otherwise do development on other branches. When sending PRs, you push to a different branch on your public fork and create the PR from there.
"},{"location":"developers/general/contributing/#setup","title":"Setup","text":" -
Create a fork of the main repository.
-
Create a local git repository with remotes upstream
(the main repository) and origin
(your personal fork). To list your current remotes git remote -v
.
a. If you already cloned from the main repository (e.g. by following the getting started guide):
# From your existing git repo\n$ git remote rename origin upstream\n$ git remote add origin https://github.com/<github_username>/iree.git\n
b. If you haven't already cloned:
# From whatever directory under which you want to nest your repo\n$ git clone https://github.com/<github_username>/iree.git\n$ cd iree\n$ git remote add upstream https://github.com/openxla/iree.git\n
This is especially important for maintainers who have write access (so can push directly to the main repository) and admins who have elevated privileges (so can push directly to protected branches).
These names are just suggestions, but you might find some scripts where the defaults are for remotes named like this.
For extra safety, you can make it difficult to push directly to upstream by setting the push url to something invalid: git remote set-url --push upstream DISABLE
, which requires re-enabling the push URL explicitly before pushing. You can wrap this behavior in a custom git command like git-sudo.
-
Use a script like git_update.sh to easily synchronize main
with upstream
. Submodules make this is a little trickier than it should be. You can also turn this into a git command by adding it to your path as git-update
.
"},{"location":"developers/general/contributing/#git-config","title":"Git config","text":"These are some additional options you could put in your top-level .gitconfig
or repository-specific .git/config
files that are conducive the recommended workflow
[push]\ndefault = current\n[alias]\n# Delete branches that you pushed and have been deleted upstream, e.g. because\n# the PR was merged.\ngone = ! \"git fetch -p && git for-each-ref --format '%(refname:short) %(upstream:track)' | awk '$2 == \\\"[gone]\\\" {print $1}' | xargs -r git branch -D\"\n# Update from upstream (custom command) and delete obsolete local branches.\nsync = ! (git update main && git gone)\n# Create a new branch based off of main (requires a clean working directory).\nnew = \"!f(){ \\\\\\ngit checkout main && git switch -c $1; \\\\\\n}; f\"\n# Display branches in a useful \"latest last\" format\nbr = for-each-ref --sort=committerdate refs/heads/ --format='%(HEAD) %(color:yellow)%(refname:short)%(color:reset) - %(color:red)%(objectname:short)%(color:reset) - %(contents:subject) (%(color:green)%(committerdate:relative)%(color:reset))'\n# `git git foo` -> `git foo` typo fixer\ngit = \"!f(){ \\\\\\n git \\\"$@\\\"; \\\\\\n}; f\"\n# Get the git root directory\nroot = rev-parse --show-toplevel\n# checkout, but also sync submodules\nch = \"!f() { \\\\\\n git checkout \\\"$@\\\"; git submodule sync && git submodule update --init; \\\\\\n}; f\"\n# See the diff for a PR branch vs the main branch\ndiffmain = diff --merge-base main\n# See only the files that differ vs the main branch\nwhatsout = diffmain --name-only\n[checkout]\n# If the checkout command\ndefaultRemote = origin\n[pull]\n# When pulling, only complete the pull if its a clean fast forward.\nff = only\n[remote]\n# Push to your fork (origin) by default\npushDefault = origin\n[url \"ssh://git@github.com/\"]\n# Pull with https (so no auth required), but push with ssh.\npushInsteadOf = https://github.com/\n
"},{"location":"developers/general/developer-overview/","title":"Developer overview","text":"This guide provides an overview of IREE's project structure and main tools for developers.
"},{"location":"developers/general/developer-overview/#project-code-layout","title":"Project code layout","text":" - /compiler/: MLIR dialects, LLVM compiler passes, module translation code, etc.
- bindings/: Python and other language bindings
- /runtime/: Standalone runtime code including the VM and HAL drivers
- bindings/: Python and other language bindings
- /integrations/: Integrations between IREE and other frameworks, such as TensorFlow
- /tests/: Tests for full compiler->runtime workflows
- /tools/: Developer tools (
iree-compile
, iree-run-module
, etc.) - /samples/: Also see the separate https://github.com/iree-org/iree-samples repository
"},{"location":"developers/general/developer-overview/#iree-compiler-code-layout","title":"IREE compiler code layout","text":" - API/: Public C API
- Codegen/: Code generation for compute kernels
- Dialect/: MLIR dialects (
Flow
, HAL
, Stream
, VM
, etc.) - InputConversion/: Conversions from input dialects and preprocessing
"},{"location":"developers/general/developer-overview/#iree-runtime-code-layout","title":"IREE runtime code layout","text":" - base/: Common types and utilities used throughout the runtime
- hal/: Hardware Abstraction Layer for IREE's runtime, with implementations for hardware and software backends
- schemas/: Data storage format definitions, primarily using FlatBuffers
- task/: System for running tasks across multiple CPU threads
- tooling/: Utilities for tests and developer tools, not suitable for use as-is in downstream applications
- vm/: Bytecode Virtual Machine used to work with IREE modules and invoke IREE functions
"},{"location":"developers/general/developer-overview/#developer-tools","title":"Developer tools","text":"IREE's core compiler accepts programs in supported input MLIR dialects (e.g. stablehlo
, tosa
, linalg
). Import tools and APIs may be used to convert from framework-specific formats like TensorFlow SavedModel to MLIR modules. While programs are ultimately compiled down to modules suitable for running on some combination of IREE's target deployment platforms, IREE's developer tools can run individual compiler passes, translations, and other transformations step by step.
"},{"location":"developers/general/developer-overview/#iree-opt","title":"iree-opt","text":"iree-opt
is a tool for testing IREE's compiler passes. It is similar to mlir-opt and runs sets of IREE's compiler passes on .mlir
input files. See \"conversion\" in MLIR's Glossary for more information. Transformations performed by iree-opt
can range from individual passes performing isolated manipulations to broad pipelines that encompass a sequence of steps.
Test .mlir
files that are checked in typically include a RUN
block at the top of the file that specifies which passes should be performed and if FileCheck
should be used to test the generated output.
Here's an example of a small compiler pass running on a test file:
$ ../iree-build/tools/iree-opt \\\n--split-input-file \\\n--mlir-print-ir-before-all \\\n--iree-util-drop-compiler-hints \\\n$PWD/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir\n
For a more complex example, here's how to run IREE's complete transformation pipeline targeting the VMVX backend on the fullyconnected.mlir model file:
$ ../iree-build/tools/iree-opt \\\n--iree-transformation-pipeline \\\n--iree-hal-target-backends=vmvx \\\n$PWD/tests/e2e/stablehlo_models/fullyconnected.mlir\n
"},{"location":"developers/general/developer-overview/#iree-compile","title":"iree-compile","text":"iree-compile
is IREE's main compiler driver for generating binaries from supported input MLIR assembly.
For example, to translate simple.mlir
to an IREE module:
$ ../iree-build/tools/iree-compile \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
"},{"location":"developers/general/developer-overview/#iree-run-module","title":"iree-run-module","text":"The iree-run-module
program takes an already translated IREE module as input and executes an exported main function using the provided inputs.
This program can be used in sequence with iree-compile
to translate a .mlir
file to an IREE module and then execute it. Here is an example command that executes the simple simple_abs_vmvx.vmfb
compiled from simple_abs.mlir
above on IREE's VMVX driver:
$ ../iree-build/tools/iree-run-module \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
"},{"location":"developers/general/developer-overview/#iree-check-module","title":"iree-check-module","text":"The iree-check-module
program takes an already translated IREE module as input and executes it as a series of googletest tests. This is the test runner for the IREE check framework.
$ ../iree-build/tools/iree-compile \\\n--iree-input-type=stablehlo \\\n--iree-hal-target-backends=vmvx \\\n$PWD/tests/e2e/xla_ops/abs.mlir \\\n-o /tmp/abs.vmfb\n
$ ../iree-build/tools/iree-check-module \\\n--device=local-task \\\n--module=/tmp/abs.vmfb\n
"},{"location":"developers/general/developer-overview/#iree-run-mlir","title":"iree-run-mlir","text":"The iree-run-mlir
program takes a .mlir
file as input, translates it to an IREE bytecode module, and executes the module.
It is designed for testing and debugging, not production uses, and therefore does some additional work that usually must be explicit, like marking every function as exported by default and running all of them.
For example, to execute the contents of samples/models/simple_abs.mlir:
# iree-run-mlir <compiler flags> [input.mlir] <runtime flags>\n$ ../iree-build/tools/iree-run-mlir \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n--input=f32=-2\n
"},{"location":"developers/general/developer-overview/#iree-dump-module","title":"iree-dump-module","text":"The iree-dump-module
program prints the contents of an IREE module FlatBuffer file.
For example, to inspect the module translated above:
../iree-build/tools/iree-dump-module /tmp/simple_abs_vmvx.vmfb\n
"},{"location":"developers/general/developer-overview/#useful-generic-flags","title":"Useful generic flags","text":""},{"location":"developers/general/developer-overview/#read-inputs-from-a-file","title":"Read inputs from a file","text":"All the IREE tools support reading input values from a file. This is quite useful for debugging. Use --help
for each tool to see what the flag to set. The inputs are expected to be newline-separated. Each input should be either a scalar or a buffer. Scalars should be in the format type=value
and buffers should be in the format [shape]xtype=[value]
. For example:
1x5xf32=1,-2,-3,4,-5\n1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1\n
"},{"location":"developers/general/developer-overview/#-iree-flow-trace-dispatch-tensors","title":"--iree-flow-trace-dispatch-tensors
","text":"This flag will enable tracing inputs and outputs for each dispatch function. It is easier to narrow down test cases, since IREE breaks a ML workload into multiple dispatch function. When the flag is on, IREE will insert trace points before and after each dispatch function. The first trace op is for inputs, and the second trace op is for outputs. There will be two events for one dispatch function.
"},{"location":"developers/general/developer-tips/","title":"Developer tips and tricks","text":"The IREE compiler is built using MLIR, so it naturally supports the common MLIR debugging workflows. For areas where IREE differentiates itself, this page lists other helpful tips and tricks.
"},{"location":"developers/general/developer-tips/#setting-compiler-options","title":"Setting compiler options","text":"Tools such as iree-compile
take options via command-line flags. Pass --help
to see the full list:
$ iree-compile --help\n\nOVERVIEW: IREE compilation driver\n\nUSAGE: iree-compile [options] <input file or '-' for stdin>\n\nOPTIONS:\n ...\n
Tip - Options and the Python bindings
If you are using the Python bindings, options can be passed via the extra_args=[\"--flag\"]
argument:
import iree.compiler as ireec\n\ninput_mlir = \"\"\"\nfunc.func @abs(%input : tensor<f32>) -> (tensor<f32>) {\n%result = math.absf %input : tensor<f32>\n return %result : tensor<f32>\n}\"\"\"\n\ncompiled_module = ireec.tools.compile_str(\n input_mlir,\n target_backends=[\"llvm-cpu\"],\nextra_args=[\"--mlir-timing\"])\n
"},{"location":"developers/general/developer-tips/#inspecting-vmfb-files","title":"Inspecting .vmfb
files","text":"The IREE compiler generates FlatBuffer files using the .vmfb
file extension, short for \"Virtual Machine FlatBuffer\", which can then be loaded and executed using IREE's runtime.
Info - other output formats The IREE compiler can output different formats with the `--output-format=
flag:
Flag value Output --output-format=vm-bytecode
(default) VM Bytecode (.vmfb
) files --output-format=vm-c
C source modules VM Bytecode files are usable across a range of deployment scenarios, while C source modules provide low level connection points for constrained environments like bare metal platforms.
By default, .vmfb
files can be opened as zip files: (1)
- Setting
--iree-vm-emit-polyglot-zip=false
will disable this feature and decrease file size slightly
$ unzip -d simple_abs_cpu ./simple_abs_cpu.vmfb\n\nArchive: ./simple_abs_cpu.vmfb\n extracting: simple_abs_cpu/module.fb\n extracting: simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n
The embedded binary (here an ELF shared object with CPU code) can be parsed by standard tools:
$ readelf -Ws ./simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n\nSymbol table '.dynsym' contains 2 entries:\n Num: Value Size Type Bind Vis Ndx Name\n 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND\n 1: 0000000000001760 17 FUNC GLOBAL DEFAULT 7 iree_hal_executable_library_query\n\nSymbol table '.symtab' contains 42 entries:\n Num: Value Size Type Bind Vis Ndx Name\n 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND\n 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS abs_dispatch_0\n 2: 0000000000001730 34 FUNC LOCAL DEFAULT 7 abs_dispatch_0_generic\n 3: 00000000000034c0 80 OBJECT LOCAL DEFAULT 8 iree_hal_executable_library_query_v0\n 4: 0000000000001780 111 FUNC LOCAL DEFAULT 7 iree_h2f_ieee\n 5: 00000000000017f0 207 FUNC LOCAL DEFAULT 7 iree_f2h_ieee\n ...\n
The iree-dump-module
tool can also be used to see information about a given .vmfb
file:
$ iree-dump-module simple_abs.vmfb\n\n//===---------------------------------------------------------------------===//\n// @module : version 0\n//===---------------------------------------------------------------------===//\n\nRequired Types:\n [ 0] i32\n [ 1] i64\n [ 2] !hal.allocator\n [ 3] !hal.buffer\n ...\n\nModule Dependencies:\n hal, version >= 0, required\n\nImported Functions:\n [ 0] hal.ex.shared_device() -> (!vm.ref<?>)\n [ 1] hal.allocator.allocate(!vm.ref<?>, i32, i32, i64) -> (!vm.ref<?>)\n ...\n\nExported Functions:\n [ 0] abs(!vm.ref<?>) -> (!vm.ref<?>)\n [ 1] __init() -> ()\n\n...\n
"},{"location":"developers/general/developer-tips/#dumping-executable-files","title":"Dumping executable files","text":"The --iree-hal-dump-executable-*
flags instruct the compiler to save files related to \"executable translation\" (code generation for a specific hardware target) into a directory of your choosing. If you are interested in seeing which operations in your input program were fused into a compute kernel or what device code was generated for a given program structure, these flags are a great starting point.
Flag Files dumped iree-hal-dump-executable-files-to
All files (meta-flag) iree-hal-dump-executable-sources-to
Source .mlir
files prior to HAL compilation iree-hal-dump-executable-intermediates-to
Intermediate files (e.g. .o
files, .mlir
stages) iree-hal-dump-executable-binaries-to
Binary files (e.g. .so
, .spv
, .ptx
), as used in the .vmfb
iree-hal-dump-executable-benchmarks-to
Standalone benchmark files for iree-benchmark-module
CPUGPU - VulkanGPU - CUDA $ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-link-embedded=false \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_system_elf_x86_64_benchmark.mlir\nmodule_abs_dispatch_0_system_elf_x86_64.codegen.bc\nmodule_abs_dispatch_0_system_elf_x86_64.linked.bc\nmodule_abs_dispatch_0_system_elf_x86_64.optimized.bc\nmodule_abs_dispatch_0_system_elf_x86_64.o\nmodule_abs_dispatch_0_system_elf_x86_64.s\nmodule_abs_dispatch_0_system_elf_x86_64.so\nsimple_abs_cpu.vmfb\n
Tip - Embedded and system linking
The default value of --iree-llvmcpu-link-embedded=true
generates embedded ELF files. By disabling that flag, the compiler will produce platform-standard .so
files for Linux, .dll
files for Windows, etc. While embedded ELF files can be smaller and more portable, inspection of artifacts is easier with platform-standard shared object files.
Tip - Disassembling .bc
files with llvm-dis
The .bc
intermediate files use the LLVM BitCode format, which can be disassembled using llvm-dis
:
// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_system_elf_x86_64.codegen.bc\n$ cat module_abs_dispatch_0_system_elf_x86_64.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_system_elf_x86_64.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\ntarget triple = \"x86_64-linux-gnu\"\n\n%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }\n%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }\n\n...\n\ndefine internal i32 @abs_dispatch_0_generic(\n ptr noalias nonnull align 16 %0,\n ptr noalias nonnull align 16 %1,\n ptr noalias nonnull align 16 %2) #0 {\n %4 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10,\n %6 = load ptr, ptr %5, align 8,\n %7 = ptrtoint ptr %6 to i64,\n %8 = and i64 %7, 63,\n %9 = icmp eq i64 %8, 0,\n call void @llvm.assume(i1 %9),\n %10 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n %11 = extractvalue %iree_hal_executable_dispatch_state_v0_t %10, 10,\n %12 = getelementptr ptr, ptr %11, i32 1,\n %13 = load ptr, ptr %12, align 8,\n %14 = ptrtoint ptr %13 to i64,\n %15 = and i64 %14, 63,\n %16 = icmp eq i64 %15, 0,\n call void @llvm.assume(i1 %16),\n %17 = load float, ptr %6, align 4,\n %18 = call float @llvm.fabs.f32(float %17),\n store float %18, ptr %13, align 4,\n ret i32 0,\n}\n\n...\n
$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=vulkan-spirv \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_vulkan.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb_benchmark.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.spv\nsimple_abs_vulkan.vmfb\n
Tip - Disassembling .spv
files with spirv-dis
The .spv
files use the SPIR-V binary format, which can be disassembled using spirv-dis
from SPIR-V Tools:
$ cd /tmp/iree/simple_abs/\n$ spirv-dis module_abs_dispatch_0_vulkan_spirv_fb.spv\n\n; SPIR-V\n; Version: 1.0\n; Generator: Khronos; 22\n; Bound: 20\n; Schema: 0\n OpCapability Shader\n OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n %18 = OpExtInstImport \"GLSL.std.450\"\n OpMemoryModel Logical GLSL450\n OpEntryPoint GLCompute %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n OpExecutionMode %abs_dispatch_0_generic LocalSize 1 1 1\n OpName %__resource_var_0_0_ \"__resource_var_0_0_\"\n OpName %__resource_var_0_1_ \"__resource_var_0_1_\"\n OpName %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n OpDecorate %_arr_float_uint_1 ArrayStride 4\n OpMemberDecorate %_struct_2 0 Offset 0\n OpDecorate %_struct_2 Block\n OpDecorate %__resource_var_0_0_ Binding 0\n OpDecorate %__resource_var_0_0_ DescriptorSet 0\n OpDecorate %__resource_var_0_1_ Binding 1\n OpDecorate %__resource_var_0_1_ DescriptorSet 0\n %float = OpTypeFloat 32\n %uint = OpTypeInt 32 0\n %uint_1 = OpConstant %uint 1\n%_arr_float_uint_1 = OpTypeArray %float %uint_1\n %_struct_2 = OpTypeStruct %_arr_float_uint_1\n%_ptr_StorageBuffer__struct_2 = OpTypePointer StorageBuffer %_struct_2\n%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n %void = OpTypeVoid\n %9 = OpTypeFunction %void\n %uint_0 = OpConstant %uint 0\n%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float\n%abs_dispatch_0_generic = OpFunction %void None %9\n %12 = OpLabel\n %15 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %uint_0\n %16 = OpLoad %float %15\n %17 = OpExtInst %float %18 FAbs %16\n %19 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %uint_0\n OpStore %19 %17\n OpReturn\n OpFunctionEnd\n
$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=cuda \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_cuda.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0_cuda_nvptx_fb_benchmark.mlir\nmodule_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.linked.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.optimized.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.ptx\nmodule_abs_dispatch_0.mlir\nsimple_abs_cuda.vmfb\n
Tip - Disassembling .bc
files with llvm-dis
The .bc
intermediate files use the LLVM BitCode format, which can be disassembled using llvm-dis
:
// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\n$ cat module_abs_dispatch_0_cuda_nvptx_fb.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\n\ndeclare ptr @malloc(i64)\n\ndeclare void @free(ptr)\n\ndeclare float @__nv_fabsf(float)\n\ndefine void @abs_dispatch_0_generic(ptr noalias readonly align 16 %0, ptr noalias align 16 %1) {\n %3 = ptrtoint ptr %0 to i64\n %4 = and i64 %3, 63\n %5 = icmp eq i64 %4, 0\n call void @llvm.assume(i1 %5)\n %6 = ptrtoint ptr %1 to i64\n %7 = and i64 %6, 63\n %8 = icmp eq i64 %7, 0\n call void @llvm.assume(i1 %8)\n %9 = load float, ptr %0, align 4\n %10 = call float @__nv_fabsf(float %9)\n store float %10, ptr %1, align 4\n ret void\n}\n\n!nvvm.annotations = !{!0, !1, !2, !3}\n\n!0 = !{ptr @abs_dispatch_0_generic, !\"kernel\", i32 1}\n!1 = !{ptr @abs_dispatch_0_generic, !\"maxntidx\", i32 1}\n!2 = !{ptr @abs_dispatch_0_generic, !\"maxntidy\", i32 1}\n!3 = !{ptr @abs_dispatch_0_generic, !\"maxntidz\", i32 1}\n
"},{"location":"developers/general/developer-tips/#compiling-phase-by-phase","title":"Compiling phase by phase","text":"IREE compiles programs through a series of broad phases:
graph LR\n accTitle: Compilation phases overview\n accDescr: Input to ABI to Flow to Stream to HAL to VM\n\n A([Input])\n A --> B([ABI])\n B --> C([Flow])\n C --> D([Stream])\n D --> E([HAL])\n E --> F([VM])
Tip - available phases These are the phase names available for use with the --compile-to
and --compile-from
flags described below:
Phase name Description input
Performs input processing and lowering into core IREE input dialects (linalg/etc) abi
Adjusts the program ABI for the specified execution environment preprocessing
Applies customizable preprocessing
prior to FLow/Stream/HAL/VM flow
Models execution data flow and partitioning using the flow
dialect stream
Models execution partitioning and scheduling using the stream
dialect executable-sources
Prepares hal
dialect executables for translation, prior to codegen executable-targets
Runs code generation for hal
dialect executables hal
Finishes hal
dialect processing vm
Lowers to IREE's abstract virtual machine using the vm
dialect end
Completes the full compilation pipeline For an accurate list of phases, see the source code or check the help output with a command such as:
iree-compile --help | sed -n '/--compile-to/,/--/p' | head -n -1\n
You can output a program snapshot at intermediate phases with the --compile-to=<phase name>
flag:
$ cat simple_abs.mlir\n\nfunc.func @abs(%input : tensor<f32>) -> (tensor<f32>) {\n %result = math.absf %input : tensor<f32>\n return %result : tensor<f32>\n}\n\n$ iree-compile simple_abs.mlir --compile-to=abi\n\nmodule {\n func.func @abs(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {\n %0 = hal.tensor.import %arg0 \"input 0\" : !hal.buffer_view -> tensor<f32>\n %1 = math.absf %0 : tensor<f32>\n %2 = hal.tensor.export %1 \"output 0\" : tensor<f32> -> !hal.buffer_view\n return %2 : !hal.buffer_view\n }\n}\n
This is similar to the --mlir-print-ir-after=
flag, but at clearly defined pipeline phases.
Compilation can be continued from any intermediate phase. This allows for interative workflows - compile to a phase, make edits to the .mlir
file, then resume compilation and continue through the pipeline:
$ iree-compile simple_abs.mlir --compile-to=abi -o simple_abs_abi.mlir\n\n$ sed \\\n-e 's/math.absf/math.exp/' \\\n-e 's/@abs/@exp/' \\\nsimple_abs_abi.mlir > simple_exp_abi.mlir\n\n$ iree-compile simple_exp_abi.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n-o simple_exp_cpu.vmfb\n
or explicitly resume from an intermediate phase with --compile-from=<phase name>
:
$ iree-compile simple_exp_abi.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n--compile-from=abi \\\n-o simple_exp_cpu.vmfb\n
"},{"location":"developers/general/release-management/","title":"Release management","text":"IREE cuts automated releases via a workflow that is triggered daily. The only constraint placed on the commit that is released is that it has passed all CI checks. These are published on GitHub with the \"pre-release\" status. For debugging this process, see the Release debugging playbook.
We periodically promote one of these candidates to a \"stable\" release by removing the \"pre-release\" status. This makes it show up as a \"latest\" release on GitHub. We also push the Python packages for this release to PyPI.
"},{"location":"developers/general/release-management/#picking-a-candidate-to-promote","title":"Picking a candidate to promote","text":"When selecting a candidate we use the following criteria:
- \u2a864 days old so that problems with it may have been spotted
- Contains no P0 regressions vs the previous stable release
- LLVM submodule commit ideally exists upstream (no cherry picks or patches)
When you've identified a potential candidate, email the iree-discuss list with the proposal and solicit feedback. People may point out known regressions or request that some feature make the cut.
"},{"location":"developers/general/release-management/#promoting-a-candidate-to-stable","title":"Promoting a candidate to stable","text":" -
(Authorized users only) Push to PyPI using pypi_deploy.sh
- For Googlers, the password is stored at http://go/iree-pypi-password
-
Open the release on GitHub. Rename the release from \"candidate\" to \"stable\", uncheck the option for \"pre-release\", and check the option for \"latest\".
"},{"location":"developers/general/testing-guide/","title":"Testing guide","text":"Like the IREE project in general, IREE tests are divided into a few different components and use different tooling depending on the needs of that component.
Test type Test Build system Supported platforms Compiler tests iree_lit_test Bazel/CMake Host Runtime tests iree_cc_test Bazel/CMake Host/Device iree_native_test Bazel/CMake Host/Device iree_hal_cts_test_suite CMake Host/Device Core E2E tests iree_check_test Bazel/CMake Host/Device iree_trace_runner_test Bazel/CMake Host/Device iree_generated_trace_runner_test Bazel/CMake Host/Device iree_static_linker_test CMake Host/Device There are also more *_test_suite
targets that groups test targets with the same configuration together.
"},{"location":"developers/general/testing-guide/#compiler-tests","title":"Compiler tests","text":"Tests for the IREE compilation pipeline are written as lit tests in the same style as MLIR.
By convention, IREE includes tests for
- printing and parsing of ops in
.../IR/test/{OP_CATEGORY}_ops.mlir
files - folding and canonicalization in
.../IR/test/{OP_CATEGORY}_folding.mlir
files - compiler passes and pipelines in other
.../test/*.mlir
files
"},{"location":"developers/general/testing-guide/#running-a-test","title":"Running a test","text":"For the test iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir
With CMake, run this from the build directory:
ctest -R iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir.test\n
With Bazel, run this from the repo root:
bazel test //compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/test:arithmetic_ops.mlir.test\n
"},{"location":"developers/general/testing-guide/#writing-a-test","title":"Writing a test","text":"For advice on writing MLIR compiler tests, see the MLIR testing guide. Tests should be .mlir
files in test
directory adjacent to the functionality they are testing. Instead of mlir-opt
, use iree-opt
, which registers IREE dialects and passes and doesn't register some unnecessary core ones.
As with most parts of the IREE compiler, these should not have a dependency on the runtime.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system","title":"Configuring the build system","text":"In the Bazel BUILD file, create a iree_lit_test_suite
rule. We usually create a single suite that globs all .mlir
files in the directory and is called \"lit\".
load(\"//iree/build_tools/bazel:iree_lit_test.bzl\", \"iree_lit_test_suite\")\n\niree_lit_test_suite(\n name = \"lit\",\n srcs = glob([\"*.mlir\"]),\n tools = [\n \"@llvm-project//llvm:FileCheck\",\n \"//tools:iree-opt\",\n ],\n)\n
There is a corresponding CMake function, calls to which will be generated by our Bazel to CMake converter.
iree_lit_test_suite(\nNAME\nlit\nSRCS\n\"arithmetic_ops.mlir\"\nDATA\nFileCheck\niree-opt\n)\n
You can also create a test for a single file with iree_lit_test
.
"},{"location":"developers/general/testing-guide/#runtime-tests","title":"Runtime tests","text":"Tests for the runtime C++ code use the GoogleTest testing framework. They should generally follow the style and best practices of that framework.
"},{"location":"developers/general/testing-guide/#running-a-test_1","title":"Running a test","text":"For the test /runtime/src/iree/base/bitfield_test.cc
:
With CMake, run this from the build directory:
ctest -R iree/base/bitfield_test\n
With Bazel, run this from the repo root:
bazel test //runtime/src/iree/base:arena_test\n
"},{"location":"developers/general/testing-guide/#setting-test-environments","title":"Setting test environments","text":"Parallel testing for ctest
can be enabled via the CTEST_PARALLEL_LEVEL
environment variable. For example:
export CTEST_PARALLEL_LEVEL=$(nproc)\n
To use the Vulkan backend as test driver, you may need to select between a Vulkan implementation from SwiftShader and multiple Vulkan-capable hardware devices. This can be done via environment variables. See the generic Vulkan setup page for details regarding these variables.
For Bazel, you can persist the configuration in user.bazelrc
to save typing. For example:
test:vkswiftshader --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vkswiftshader --test_env=\"VK_LAYER_PATH=...\"\ntest:vknative --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vknative --test_env=\"VK_LAYER_PATH=...\"\n
Then you can use bazel test --config=vkswiftshader
to select SwiftShader as the Vulkan implementation. Similarly for other implementations.
"},{"location":"developers/general/testing-guide/#writing-a-test_1","title":"Writing a test","text":"For advice on writing tests in the GoogleTest framework, see the GoogleTest primer. Test files for source file foo.cc
with build target foo
should live in the same directory with source file foo_test.cc
and build target foo_test
. You should #include
iree/testing/gtest.h
instead of any of the gtest or gmock headers.
As with all parts of the IREE runtime, these should not have a dependency on the compiler.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system_1","title":"Configuring the build system","text":"In the Bazel BUILD file, create a cc_test
target with your test file as the source and any necessary dependencies. Usually, you can link in a standard gtest main function. Use iree/testing:gtest_main
instead of the gtest_main
that comes with gtest.
cc_test(\n name = \"arena_test\",\n srcs = [\"arena_test.cc\"],\n deps = [\n \":arena\",\n \"//iree/testing:gtest_main\",\n ],\n)\n
We have created a corresponding CMake function iree_cc_test
that mirrors the Bazel rule's behavior. Our Bazel to CMake converter should generally derive the CMakeLists.txt
file from the BUILD file:
iree_cc_test(\nNAME\narena_test\nSRCS\n\"arena_test.cc\"\nDEPS\n::arena\niree::testing::gtest_main\n)\n
There are other more specific test targets, such as iree_hal_cts_test_suite
, which are designed to test specific runtime support with template configuration and is not supported by Bazel rules.
"},{"location":"developers/general/testing-guide/#iree-core-end-to-end-e2e-tests","title":"IREE core end-to-end (e2e) tests","text":"Here \"end-to-end\" means from the input accepted by the IREE core compiler (dialects like TOSA, StableHLO, Linalg) to execution using the IREE runtime components. It does not include tests of the integrations with ML frameworks (e.g. TensorFlow, PyTorch) or bindings to other languages (e.g. Python).
We avoid using the more traditional lit
tests used elsewhere in the compiler for runtime execution tests. Lit tests require running the compiler tools on the test platform through shell or python scripts that act on files from a local file system. On platforms like Android, the web, and embedded systems, each of these features is either not available or is severely limited.
Instead, to test these flows we use a custom framework called check
. The check framework compiles test programs on the host machine into standalone test binary files that can be pushed to test devices (such as Android phones) where they run with gtest style assertions (e.g. check.expect_almost_eq(lhs, rhs)
).
"},{"location":"developers/general/testing-guide/#building-e2e-tests","title":"Building e2e tests","text":"The files needed by these tests are not built by default with CMake. You'll need to build the special iree-test-deps
target to generate test files prior to running CTest (from the build directory):
cmake --build . --target iree-test-deps\n
To run e2e model tests in generated_e2e_model_tests.cmake, because of their dependencies, -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON
needs to be set when configuring CMake. Also see IREE Benchmark Suite Prerequisites for required packages.
"},{"location":"developers/general/testing-guide/#running-a-test_2","title":"Running a Test","text":"For the test tests/e2e/xla_ops/floor.mlir
compiled for the VMVX target backend and running on the VMVX driver (here they match exactly, but in principle there's a many-to-many mapping from backends to drivers).
With CMake, run this from the build directory:
ctest -R tests/e2e/xla_ops/check_vmvx_local-task_floor.mlir\n
With Bazel, run this from the repo root:
bazel test tests/e2e/xla_ops:check_vmvx_local-task_floor.mlir\n
"},{"location":"developers/general/testing-guide/#setting-test-environments_1","title":"Setting test environments","text":"Similarly, you can use environment variables to select Vulkan implementations for running tests as explained in the Runtime tests section.
"},{"location":"developers/general/testing-guide/#writing-a-test_2","title":"Writing a test","text":"These tests live in tests/e2e
. A single test consists of a .mlir
source file specifying an IREE module where each exported function takes no inputs and returns no results and corresponds to a single test case.
As an example, here are some tests for the MHLO floor operation:
func.func @tensor() {\n %input = util.unfoldable_constant dense<[0.0, 1.1, 2.5, 4.9]> : tensor<4xf32>\n %result = \"mhlo.floor\"(%input) : (tensor<4xf32>) -> tensor<4xf32>\n check.expect_almost_eq_const(%result, dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>): tensor<4xf32>\n return\n}\n\nfunc.func @scalar() {\n %input = util.unfoldable_constant dense<101.3> : tensor<f32>\n %result = \"mhlo.floor\"(%input) : (tensor<f32>) -> tensor<f32>\n check.expect_almost_eq_const(%result, dense<101.0> : tensor<f32>): tensor<f32>\n return\n}\n\nfunc.func @negative() {\n %input = util.unfoldable_constant dense<-1.1> : tensor<f32>\n %result = \"mhlo.floor\"(%input) : (tensor<f32>) -> tensor<f32>\n check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>): tensor<f32>\n return\n}\n
Test cases are created in gtest for each public function exported by the module.
Note the use of util.unfoldable_constant
to specify test constants. If we were to use a regular constant the compiler would fold away everything at compile time and our test would not actually test the runtime. unfoldable_constant
adds a barrier that prevents folding. To prevent folding/constant propagate on an arbitrary SSA-value you can use util.optimization_barrier
.
Next we use this input constant to exercise the runtime feature under test (in this case, just a single floor operation). Finally, we use a check dialect operation to make an assertion about the output. There are a few different assertion operations. Here we use the expect_almost_eq_const
op: almost because we are comparing floats and want to allow for floating-point imprecision, and const because we want to compare it to a constant value. This last part is just syntactic sugar around
%expected = arith.constant dense<101.0> : tensor<f32>\ncheck.expect_almost_eq(%result, %expected) : tensor<f32>\n
The output of running this test looks like:
[==========] Running 4 tests from 1 test suite.\n[----------] Global test environment set-up.\n[----------] 4 tests from module\n[ RUN ] module.tensor\n[ OK ] module.tensor (76 ms)\n[ RUN ] module.scalar\n[ OK ] module.scalar (79 ms)\n[ RUN ] module.double\n[ OK ] module.double (55 ms)\n[ RUN ] module.negative\n[ OK ] module.negative (54 ms)\n[----------] 4 tests from module (264 ms total)\n\n[----------] Global test environment tear-down\n[==========] 4 tests from 1 test suite ran. (264 ms total)\n[ PASSED ] 4 tests.\n
The \"module\" name for the test suite comes from the default name for an implicit MLIR module. To give the test suite a more descriptive name, use an explicit named top-level module in this file.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system_2","title":"Configuring the build system","text":"A single .mlir
source file can be turned into a test target with the iree_check_test
Bazel macro (and corresponding CMake function).
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test\")\n\niree_check_test(\n name = \"check_vmvx_local-task_floor.mlir\",\n src = \"floor.mlir\",\n driver = \"local-task\",\n target_backend = \"vmvx\",\n)\n
The target naming convention is \"check_backend_driver_src\". The generated test will automatically be tagged with a \"driver=vmvx\" tag, which can help filter tests by backend (especially when many tests are generated, as below).
Usually we want to create a suite of tests across many backends and drivers. This can be accomplished with additional macros. For a single backend/driver pair:
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_single_backend_test_suite\")\n\niree_check_single_backend_test_suite(\n name = \"check_vmvx_local-task\",\n srcs = glob([\"*.mlir\"]),\n driver = \"local-task\",\n target_backend = \"vmvx\",\n)\n
This will generate a separate test target for each file in srcs
with a name following the convention above as well as a Bazel test_suite called \"check_vmvx_local-task\" that will run all the generated tests.
You can also generate suites across multiple pairs:
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test_suite\")\n\niree_check_test_suite(\n name = \"check\",\n srcs = [\"success.mlir\"],\n # Leave this argument off to run on all supported backend/driver pairs.\n target_backends_and_drivers = [\n (\"vmvx\", \"local-task\"),\n (\"vulkan-spirv\", \"vulkan\"),\n ],\n)\n
This will create a test per source file and backend/driver pair, a test suite per backend/driver pair, and a test suite, \"check\", that will run all the tests.
The CMake functions follow a similar pattern. The calls to them are generated in our CMakeLists.txt
file by bazel_to_cmake.
There are other test targets that generate tests based on template configuraton and platform detection, such as iree_static_linker_test
. Those targets are not supported by Bazel rules at this point.
"},{"location":"developers/performance/benchmark-suites/","title":"Benchmark suites","text":"IREE Benchmarks Suites is a collection of benchmarks for IREE developers to track performance improvements/regressions during development.
The benchmark suites are run for each commit on the main branch and the results are uploaded to https://perf.iree.dev for regression analysis (for the current supported targets). On pull requests, users can add labels benchmarks:*
to trigger the benchmark runs. The results will be compared with https://perf.iree.dev and post in the comments.
Information about the definitions of the benchmark suites can be found in the IREE Benchmark Suites Configurations.
"},{"location":"developers/performance/benchmark-suites/#running-benchmark-suites-locally","title":"Running benchmark suites locally","text":""},{"location":"developers/performance/benchmark-suites/#prerequisites","title":"Prerequisites","text":"Install iree-import-tf
and iree-import-tflite
in your Python environment (see Tensorflow Integration and TFLite Integration).
"},{"location":"developers/performance/benchmark-suites/#choose-benchmark-presets","title":"Choose benchmark presets","text":"IREE Benchmark Suites contain many benchmarks for different devices and model sizes, which can take lots of space and time to build all of them. So benchmarks are grouped into presets to allow building and running only a subset of them. The available presets are:
Execution benchmarks:
android-cpu
: benchmarks for mobile CPUs android-gpu
: benchmarks for mobile GPUs cuda
: benchmarks for CUDA with a small model set cuda-large
: benchmarks for CUDA with a large model set vulkan-nvidia
: benchmarks for Vulkan on NVIDIA graphics cards x86_64
: benchmarks for x86_64 CPUs with a small model set x86_64-large
: benchmarks for x86_64 with a large model set
Compilation benchmarks (to collect compilation statistics, such as module sizes):
comp-stats
: compilation benchmarks with a small model set comp-stats-large
: compilation benchmark with a large model set
Note that *-large
presets will download and build a few hundreds GBs of artifacts.
Set the environment variables of benchmark presets for the steps below, for example:
export EXECUTION_BENCHMARK_PRESETS=\"cuda,x86_64\"\nexport COMPILATION_BENCHMARK_PRESETS=\"comp-stats\"\n
"},{"location":"developers/performance/benchmark-suites/#build-benchmark-suites","title":"Build benchmark suites","text":"Configure IREE with -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON
:
cmake -GNinja -B \"${IREE_BUILD_DIR?}\" -S \"${IREE_REPO?}\" \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON \\\n-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON\n
If you only need the imported MLIR models:
cmake --build \"${IREE_BUILD_DIR?}\" --target \\\niree-benchmark-import-models\n # For large benchmarks (this will take > 100G disk space)\n# iree-benchmark-import-models-large\n
Otherwise, compile the benchmark suites and tools for benchmarking:
cmake --build \"${IREE_BUILD_DIR?}\" --target \\\niree-benchmark-suites \\\n# If any *-large preset is enabled, also build this target:\n# iree-benchmark-suites-large \\\niree-benchmark-module\nexport E2E_TEST_ARTIFACTS_DIR=\"${IREE_BUILD_DIR?}/e2e_test_artifacts\"\n
TODO(#13683): Each preset should have its own target to further reduce unnecessary builds
"},{"location":"developers/performance/benchmark-suites/#run-benchmarks","title":"Run benchmarks","text":"Export the execution benchmark config:
build_tools/benchmarks/export_benchmark_config.py execution \\\n--benchmark_presets=\"${EXECUTION_BENCHMARK_PRESETS?}\" \\\n> \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n
Run benchmarks (currently only support running on a Linux host):
build_tools/benchmarks/run_benchmarks_on_linux.py \\\n--normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--target_device_name=\"<target_device_name, e.g. c2-standard-16>\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n--verbose \\\n--cpu_uarch=\"<host CPU uarch, e.g. CascadeLake>\"\n# Traces can be collected by adding:\n# --traced_benchmark_tool_dir=\"${IREE_TRACED_BUILD_DIR?}/tools\" \\\n# --trace_capture_tool=/path/to/iree-tracy-capture \\\n# --capture_tarball=captured_tracy_files.tar.gz\n
Note that:
<target_device_name>
selects a benchmark group targets a specific device: - Common options:
c2-standard-16
for x86_64 CPU benchmarks. a2-highgpu-1g
for NVIDIA GPU benchmarks.
- All device names are defined under build_tools/python/e2e_test_framework/device_specs.
- To run x86_64 benchmarks, right now
--cpu_uarch
needs to be provided and only CascadeLake
is available currently. - To build traced benchmark tools, see Profiling with Tracy.
Filters can be used to select the benchmarks:
build_tools/benchmarks/run_benchmarks_on_linux.py \\\n--normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--target_device_name=\"c2-standard-16\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n--verbose \\\n--cpu_uarch=\"CascadeLake\" \\\n--model_name_regex=\"MobileBert*\" \\\n--driver_filter_regex='local-task' \\\n--mode_regex=\"4-thread\"\n
"},{"location":"developers/performance/benchmark-suites/#generate-compilation-statistics-compilation-benchmarks","title":"Generate compilation statistics (compilation benchmarks)","text":"Export the compilation benchmark config:
build_tools/benchmarks/export_benchmark_config.py compilation \\\n--benchmark_presets=\"${COMPILATION_BENCHMARK_PRESETS?}\" \\\n> \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
Generate the compilation statistics:
build_tools/benchmarks/collect_compilation_statistics.py \\\n--compilation_benchmark_config=comp_config.json \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--build_log=\"${IREE_BUILD_DIR?}/.ninja_log\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/compile_stats_results.json\"\n
Note that you need to use Ninja to build the benchmark suites as the tool collects information from its build log.
"},{"location":"developers/performance/benchmark-suites/#show-execution-compilation-benchmark-results","title":"Show execution / compilation benchmark results","text":"If you want to generate a comparison report locally, you can use diff_local_benchmarks.py script to compare two result json files and generate the report. For example:
build_tools/benchmarks/diff_local_benchmarks.py \\\n--base \"${E2E_TEST_ARTIFACTS_DIR?}/before_benchmark_results.json\" \\\n--target \"${E2E_TEST_ARTIFACTS_DIR?}/after_benchmark_results.json\" \\\n> report.md\n
An example that compares compilation statistics:
build_tools/benchmarks/diff_local_benchmarks.py \\\n--base-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/before_compile_stats_results.json\" \\\n--target-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/after_compile_stats_results.json\" \\\n> report.md\n
"},{"location":"developers/performance/benchmark-suites/#find-compile-and-run-commands-to-reproduce-benchmarks","title":"Find compile and run commands to reproduce benchmarks","text":"Each benchmark has its benchmark ID in the benchmark suites, you will see a benchmark ID at:
- In the serie's URL of https://perf.iree.dev
- Execution benchmark:
https://perf.iree.dev/serie?IREE?<benchmark_id>
- Compilation benchmark:
https://perf.iree.dev/serie?IREE?<benchmark_id>-<metric_id>
- In
benchmark_results.json
and compile_stats_results.json
- Execution benchmark result has a field
run_config_id
- Compilation benchmark result has a field
gen_config_id
- In PR benchmark summary or the markdown generated by
diff_local_benchmarks.py
, each benchmark has the link to its https://perf.iree.dev URL, which includes the benchmark ID.
If you don't have artifacts locally, see Fetching Benchmark Artifacts from CI to find the GCS directory of the CI artifacts. Then fetch the needed files:
# Get ${E2E_TEST_ARTIFACTS_DIR_URL} from \"Fetching Benchmark Artifacts from CI\".\nexport E2E_TEST_ARTIFACTS_DIR=\"e2e_test_artifacts\"\n\n# Download all artifacts\nmkdir \"${E2E_TEST_ARTIFACTS_DIR?}\"\ngcloud storage cp -r \"${E2E_TEST_ARTIFACTS_DIR_URL?}\" \"${E2E_TEST_ARTIFACTS_DIR?}\"\n
Run the helper tool to dump benchmark commands from benchmark configs:
build_tools/benchmarks/benchmark_helper.py dump-cmds \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/execution-benchmark-config.json\" \\\n--compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/compilation-benchmark-config.json\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--benchmark_id=\"<benchmark_id>\"\n
"},{"location":"developers/performance/benchmark-suites/#get-full-list-of-benchmarks","title":"Get full list of benchmarks","text":"The commands below output the full list of execution and compilation benchmarks, including the benchmark names and their flags:
build_tools/benchmarks/export_benchmark_config.py execution > \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\nbuild_tools/benchmarks/export_benchmark_config.py compilation > \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\nbuild_tools/benchmarks/benchmark_helper.py dump-cmds \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
"},{"location":"developers/performance/benchmark-suites/#fetching-benchmark-artifacts-from-ci","title":"Fetching benchmark Artifacts from CI","text":""},{"location":"developers/performance/benchmark-suites/#1-find-the-corresponding-ci-workflow-run","title":"1. Find the corresponding CI workflow run","text":"On the commit of the benchmark run, you can find the list of the workflow jobs by clicking the green check mark. Click any job starts with CI /
:
"},{"location":"developers/performance/benchmark-suites/#2-get-urls-of-gcs-artifacts","title":"2. Get URLs of GCS artifacts","text":"On the CI page, click Summary
on the top-left to open the summary page. Scroll down and the links to artifacts are listed in a section titled \"Artifact Links\". Paste the content in your shell to define all needed variables for the following steps:
"},{"location":"developers/performance/benchmark-suites/#3-fetch-the-benchmark-artifacts","title":"3. Fetch the benchmark artifacts","text":"To fetch files from the GCS URL, the gcloud CLI tool (https://cloud.google.com/sdk/docs/install) can list the directory contents and download files (see https://cloud.google.com/sdk/gcloud/reference/storage for more usages). If you want to use CI artifacts to reproduce benchmarks locally, see Find Compile and Run Commands to Reproduce Benchmarks.
Assume you get the GCS URL variables from Get URLs of GCS artifacts.
Download artifacts:
# The GCS directory has the same structure as your local ${IREE_BUILD_DIR?}/e2e_test_artifacts.\ngcloud storage ls \"${E2E_TEST_ARTIFACTS_DIR_URL?}\"\n\n# Download all source and imported MLIR files:\ngcloud storage cp \"${E2E_TEST_ARTIFACTS_DIR_URL?}/*.mlir\" \"<target_dir>\"\n
Execution and compilation benchmark configs can be downloaded at:
# Execution benchmark config:\ngcloud storage cp \\\n\"${E2E_TEST_ARTIFACTS_DIR_URL?}/execution-benchmark-config.json\" \\\n\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n\n# Compilation benchmark config:\ngcloud storage cp \\\n\"${E2E_TEST_ARTIFACTS_DIR_URL?}/compilation-benchmark-config.json\" \\\n\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
Benchmark raw results and traces can be downloaded at:
# Execution benchmark raw results\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-results-*.json\" .\n\n# Optional: Merge raw results into a single file\nbuild_tools/benchmarks/benchmark_helper.py merge-results benchmark-results-*.json > benchmark_results.json\n\n# Execution benchmark traces\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-traces-*.tar.gz\" .\n\n# Compilation benchmark results\ngcloud storage cp \"${COMPILATION_BENCHMARK_RESULTS_URL?}\" .\n
"},{"location":"developers/performance/benchmarking/","title":"Benchmarking","text":"IREE uses benchmarks to inspect performance at varying levels of granularity. Benchmarking is implemented using the Google Benchmark library. To understand performance details and guide optimization, please refer to the IREE profiling documentation.
"},{"location":"developers/performance/benchmarking/#module-benchmarks","title":"Module Benchmarks","text":"iree-benchmark-module
is a program accepting (almost) the same inputs as iree-run-module
that will benchmark the invocation of a single entry function. It measures timing for the whole process of invoking a function through the VM, including allocating and freeing output buffers. This is a high-level benchmark of an entire invocation flow. It provides a big picture view, but depends on many different variables, like an integration test. For finer-grained measurements more akin to unit tests, see Executable Benchmarks.
To use iree-benchmark-module
, generate an IREE module for the target backend:
$ bazel run //tools:iree-compile -- \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n-o /tmp/module.fb\n
and then benchmark an exported function in that module:
$ bazel run //tools:iree-benchmark-module -- \\\n--module=/tmp/module.fb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
You'll see output like
Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n L1 Data 32K (x6)\nL1 Instruction 32K (x6)\nL2 Unified 1024K (x6)\nL3 Unified 8448K (x1)\nLoad Average: 2.21, 1.93, 3.34\n***WARNING*** CPU scaling is enabled, the benchmark real time measurements may\n be noisy and will incur extra overhead.\n***WARNING*** Library was built as DEBUG. Timings may be affected.\n------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time 0.22 ms 0.23 ms 3356\n
Notice that there are a few warnings in there (you may not see all of these). The benchmark library helpfully warns about some common issues that will affect benchmark timing. When trying to obtain real benchmark numbers, you should generally build an optimized build (-c opt
in Bazel) and disable CPU scaling.
bazel build -c opt //tools:iree-benchmark-module\n
Another thing to consider is that depending on where you are running the benchmark you might want to avoid additional programs running at the same time. Bazel itself runs a server even when it's not being actively invoked that can be quite a memory hog, so we'll instead invoke the binary directly. Use your favorite process manager (e.g. htop or pkill on Linux) to kill heavy-weight programs such as Chrome and Bazel.
Now we'll actually invoke the binary:
$ ./bazel-bin/tools/iree-benchmark-module \\\n--module=/tmp/module.fb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n L1 Data 32K (x6)\nL1 Instruction 32K (x6)\nL2 Unified 1024K (x6)\nL3 Unified 8448K (x1)\nLoad Average: 1.49, 3.42, 3.49\n------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time 0.011 ms 0.014 ms 61654\n
Remember to restore CPU scaling when you're done.
"},{"location":"developers/performance/benchmarking/#executable-benchmarks","title":"Executable Benchmarks","text":"We also benchmark the performance of individual parts of the IREE system in isolation. IREE breaks a model down to dispatch functions. To benchmark all the dispatch functions, generate an IREE module with the -iree-flow-export-benchmark-funcs
flag set:
$ build/tools/iree-compile \\\n--iree-input-type=stablehlo \\\n--iree-flow-export-benchmark-funcs \\\n--iree-hal-target-backends=vmvx \\\ntests/e2e/stablehlo_models/fullyconnected.mlir \\\n-o /tmp/fullyconnected.vmfb\n
and then benchmark all exported dispatch functions (and all exported functions) in that module:
$ build/tools/iree-benchmark-module\n --module=/tmp/fullyconnected.vmfb\n --device=local-task\n
If no entry_function
is specified, iree-benchmark-module
will register a benchmark for each exported function that takes no inputs.
You will see output like:
Run on (72 X 3700 MHz CPU s)\nCPU Caches:\n L1 Data 32 KiB (x36)\nL1 Instruction 32 KiB (x36)\nL2 Unified 1024 KiB (x36)\nL3 Unified 25344 KiB (x2)\nLoad Average: 4.39, 5.72, 6.76\n---------------------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n---------------------------------------------------------------------------------------------\nBM_main_ex_dispatch_0_benchmark/process_time/real_time 0.030 ms 0.037 ms 34065\nBM_main_ex_dispatch_1_benchmark/process_time/real_time 0.034 ms 0.042 ms 20567\nBM_main_ex_dispatch_2_benchmark/process_time/real_time 0.043 ms 0.051 ms 18576\nBM_main_ex_dispatch_3_benchmark/process_time/real_time 0.029 ms 0.036 ms 21345\nBM_main_ex_dispatch_4_benchmark/process_time/real_time 0.042 ms 0.051 ms 15880\nBM_main_ex_dispatch_5_benchmark/process_time/real_time 0.030 ms 0.037 ms 17854\nBM_main_ex_dispatch_6_benchmark/process_time/real_time 0.043 ms 0.052 ms 14919\nBM_main_benchmark/process_time/real_time 0.099 ms 0.107 ms 5892\n
"},{"location":"developers/performance/benchmarking/#bytecode-module-benchmarks","title":"Bytecode Module Benchmarks","text":"Normally, the IREE VM is expected to be integrated into applications and driving model execution. So its performance is of crucial importance. We strive to introduce as little overhead as possible and have several benchmark binaries dedicated for evaluating the VM's performance. These benchmark binaries are named as *_benchmark
in the iree/vm/
directory. They also use the Google Benchmark library as the above.
"},{"location":"developers/performance/benchmarking/#cpu-configuration","title":"CPU Configuration","text":"When benchmarking, it's important to consider the configuration of your CPUs. Most notably, CPU scaling can give variable results, so you'll usually want to disable it. This can get pretty complex, but the most basic thing to do is to run all CPUs at maximum frequency. The other thing to consider is what CPU(s) your program is running on. Both of these get more complicated on mobile and in multithreaded workloads.
"},{"location":"developers/performance/benchmarking/#linux","title":"Linux","text":"Google benchmark provides some instructions. Note that the library will print \"CPU scaling is enabled\" warnings for any configuration that doesn't have the quota governor set to performance. Similarly the CPU frequency it reports is the maximum frequency of cpu0, not the frequency of the processor it's actually running on. This means that more advanced configurations should ignore these messages.
Turn off CPU scaling before benchmarking.
sudo cpupower frequency-set --governor performance\n
Restore CPU scaling after benchmarking:
sudo cpupower frequency-set --governor powersave\n
To learn more about different quota governor settings, see https://www.kernel.org/doc/Documentation/cpu-freq/governors.txt. To restrict which CPUs you run on, use the taskset
command which takes a hexadecimal mask.
To only run on the lowest-numbered CPU you can run
taskset 1 sleep 20 &\n
You can confirm that the process is running on the given CPU:
ps -o psr $!\n
Note that $!
indicates the process ID of the last executed background command, so you can only use this shorthand if you didn't run any commands after the sleep. For more info on taskset, see https://linux.die.net/man/1/taskset.
"},{"location":"developers/performance/benchmarking/#android","title":"Android","text":"Read and understand the Linux instructions first.
Android doesn't give us quite as nice tooling, but the principle is basically the same. One important difference is that thermal throttling is a much bigger concern on mobile. Without a cooling plate, it is likely that high clock speeds will overheat the device and engage thermal throttling, which will ignore whatever clock speeds you may have set to prevent things from catching on fire. Therefore the naive approach above is likely not a good idea.
You will likely need to be root (use su
or adb root
). The commands will depend on your exact phone and number of cores. First play around and make sure you understand what everything means. Note that each CPU has its own files which are used to control its behavior, but changes to a single CPU will sometimes affect others (see /sys/devices/system/cpu/cpu0/cpufreq/affected_cpus
).
Some useful files:
/proc/cpuinfo\n/sys/devices/system/cpu/possible\n/sys/devices/system/cpu/present\n/sys/devices/system/cpu/cpu0/online\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq\n/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed\n
See the clockspeed of each CPU
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\npaste \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_cur_freq\" \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_min_freq\" \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_max_freq\"; \\\ndone\n
Before changing things, make sure to check the current scaling governor settings first so you can put them back when you're done.
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\ncat \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
"},{"location":"developers/performance/benchmarking/#single-core-example","title":"Single-Core Example","text":"Here's an example to run IREE in a single-threaded context on CPU 7 at its lowest clock speed.
First we'll take control of the clockspeed by setting the governor to \"userspace\".
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\necho userspace > \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
We can now set individual clock speeds. We'll pin cpu7 to its minimum frequency. We choose the minimum instead of the maximum here to mitigate thermal throttling concerns
$ cat /sys/devices/system/cpu/cpu7/cpufreq/cpuinfo_min_freq > \\\n/sys/devices/system/cpu/cpu7/cpufreq/scaling_setspeed\n
We can confirm the frequencies of all the CPUs by running the same command above. Now to run a command specifically on cpu7, use taskset 80
(hex for 10000000):
taskset 80 sleep 20 &\nps -o psr $!\n
Remember to cleanup when you're done! Here we'll set the scaling governor back to schedutil because that's what they were before on the particular device this, was tested on, but that may not exist on all devices.
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\necho schedutil > \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
"},{"location":"developers/performance/benchmarking/#android-scripts","title":"Android Scripts","text":"We provide a few scripts to set clockspeeds on Android (under build_tools/benchmarks
). These are somewhat device-specific:
- The
set_android_scaling_governor.sh
work on all CPUs, but the default governor name may be different across devices. - The
set_*_gpu_scaling_policy.sh
script used should match the actual GPU on your device.
Sample configuration steps for Pixel 6:
- Copy all scripts to the device:
adb push build_tools/benchmarks/*.sh /data/local/tmp\n
- Launch interactive adb shell as super user:
adb shell\noriole:/ # su\noriole:/ # cd /data/local/tmp\n
- Pin frequencies (high clockspeeds):
oriole:/ # ./set_android_scaling_governor.sh\nCPU info (before changing governor):\n cpu governor cur min max\n ------------------------------------------------\n cpu0 sched_pixel 1098000 300000 1803000\ncpu1 sched_pixel 1598000 300000 1803000\ncpu2 sched_pixel 1598000 300000 1803000\ncpu3 sched_pixel 1098000 300000 1803000\ncpu4 sched_pixel 400000 400000 2253000\ncpu5 sched_pixel 400000 400000 2253000\ncpu6 sched_pixel 500000 500000 2802000\ncpu7 sched_pixel 500000 500000 2802000\nSetting CPU frequency governor to performance\n CPU info (after changing governor):\n cpu governor cur min max\n ------------------------------------------------\n cpu0 performance 1803000 300000 1803000\ncpu1 performance 1803000 300000 1803000\ncpu2 performance 1803000 300000 1803000\ncpu3 performance 1803000 300000 1803000\ncpu4 performance 2253000 400000 2253000\ncpu5 performance 2253000 400000 2253000\ncpu6 performance 2802000 500000 2802000\ncpu7 performance 2802000 500000 2802000\noriole:/data/local/tmp # ./set_pixel6_gpu_scaling_policy.sh\nGPU info (before changing frequency scaling policy):\n policy cur min max\n --------------------------------------------------------------\n coarse_demand [adaptive] always_on 251000 151000 848000\nSetting GPU frequency scaling policy to performance\n GPU info (after changing frequency scaling policy):\n policy cur min max\n --------------------------------------------------------------\n coarse_demand adaptive [always_on] 848000 151000 848000\n
- Restore default frequencies:
oriole:/ # ./set_android_scaling_governor.sh sched_pixel\n...\noriole:/ # ./set_pixel6_gpu_scaling_policy.sh default\n...\n
TODO(scotttodd): Windows instructions
"},{"location":"developers/performance/profiling-cpu-events/","title":"Profiling CPUs","text":"CPUs are able to record certain events that may be relevant when investigating the performance of a program. A common example of such an event is a \"cache miss\", when the program tries to access data in memory that isn't already in some CPU cache, causing that access to be slower than it could otherwise be.
Querying and analyzing this data can be useful, but is hard in two distinct ways:
- Depending on the CPU and on the OS, both hardware and software limitations can get in the way of obtaining accurate data.
- This data tends to be inherently difficult to interpret, even when it is perfectly accurate. In practice it is often noisy and inaccurate, which makes interpretation even more complicated.
There are two parts to this page: platform-specific information about how to query this data, and, at the end, a platform-independent explanation of how to interpret it.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#perf-and-simpleperf-on-linux-and-android","title":"Perf and Simpleperf, on Linux and Android","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#overview","title":"Overview","text":"The Linux kernel exposes system event counters to user-space programs by means of the perf_event_open
system call. This includes both hardware event counters (such as CPU cache events) and software events from the kernel (such as page faults and context switches). Anyone may use this system call to implement a profiler, but Linux readily offers one, perf
.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#preserving-artifacts","title":"Preserving artifacts","text":"By default IREE cleans up any temporary files it creates while running. Tools like perf, however, require those files exist even after the process has exited. The environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
can be set to preserve the files. This is only needed for the CPU path when using the system loader.
export IREE_PRESERVE_DYLIB_TEMP_FILES=1\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#desktop-linux","title":"Desktop linux","text":"On desktop Linux we can use perf
. It is provided on most Linux distributions, for instance on Debian-based distributions do:
sudo apt install linux-perf\n
Run the program to be profiled, prepending its command line with perf record
. By default this will write the profile data to the current directory, ./perf.data
. Sometimes this isn't ideal, such as then the current directory is under version control. Explicit paths can be specified by -o
flag to direct the output of perf record
, and then by -i
flags to select the input of subsequent commands analyzing the profile. Example:
perf record -o /tmp/perf.data \\\n./tools/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
By default, this samples time spent. One may specify instead an event to sample by, with the -e
flag. For instance, to sample by L1 cache misses, one may do:
perf record -o /tmp/perf.data -e L1-dcache-load-misses \\\n./tools/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
perf list
dumps the list of event types.
Once you have recorded a profile, there are two main ways to analyze it: perf report
and perf annotate
.
perf report
breaks down the event counts by symbol. In the default case where what was sampled was time, this is just an ordinary profile by symbol name, no different than what could be viewed in other profilers such as Tracy. Where it gets really interesting is when the profile was recording a specific event type, as in the above -e L1-dcache-load-misses
example:
perf report -i /tmp/perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', Event count (approx.): 362571861\nOverhead Command Shared Object Symbol\n 61.53% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_31\n 13.30% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_11\n 2.11% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_13\n 1.90% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_19\n 1.54% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_25\n 1.49% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_5\n
perf annotate
breaks down the event counts by instruction. Again, in the default case where what was sampled was time, this is no different than what could be viewed in Tracy, and the real motivation to use perf
is when profiling by specific event types as in the above -e L1-dcache-load-misses
example:
perf annotate -i perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', 4000 Hz, Event count (approx.): 362571861\nserving_default_ex_dispatch_31 /tmp/dylib_executablenzpx2Q.so [Percent: local period]\n1.66 \u2502 movups -0x1000(%rdi),%xmm10\n 0.48 \u2502 movups -0x800(%rdi),%xmm9\n 0.82 \u2502 movups (%rdi),%xmm8\n 0.49 \u2502 movaps %xmm1,%xmm4\n 0.12 \u2502 shufps $0x0,%xmm1,%xmm4\n 0.14 \u2502 mulps %xmm5,%xmm4\n 0.28 \u2502 addps %xmm6,%xmm4\n 0.60 \u2502 movaps %xmm3,%xmm6\n 0.34 \u2502 shufps $0x0,%xmm3,%xmm6\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#warning","title":"Warning","text":"perf annotate
is even noisier than perf report
as it can be overly optimistic, depending on the CPU, to pin an event to a specific instruction. Typically, this works fairly well on x86 CPUs and less well on ARM CPUs and more generally on anything mobile. Even on a desktop x86 CPU, this is noisy, as the above example (recorded on a Skylake workstation) shows: it blamed a mulps %xmm5,%xmm4
instruction for a cache miss, which doesn't make sense as that instruction only touches registers.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#android","title":"Android","text":"On Android we can use simpleperf
. It's preinstalled on current Android userdebug
images, and part of the Android NDK.
In theory, as Android is Linux, it should be possible to use perf
. Unfortunately, perf
is difficult to build for Android. Fortunately, simpleperf
is readily available: it is preinstalled in Android userdebug
images, and it is part of the Android NDK.
First, we record on the device:
adb shell \\\nsimpleperf record -e raw-l1d-cache-refill -o /data/local/tmp/perf.data \\\n/data/local/tmp/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
Then pull the recorded data from the device, and analyze on the desktop. We assume that ${ANDROID_NDK}
points to the local copy of the Android NDK.
adb pull /data/local/tmp/perf.data /tmp/perf.data\n${ANDROID_NDK}/simpleperf/report.py -i /tmp/perf.data\n
This prints a breakdown of raw-l1d-cache-refill
events by symbol.
Like with perf
, a list of event types can be queried by the list
subcommand:
adb shell simpleperf list\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#no-support-for-annotate-by-cpu-event","title":"No support for annotate
by CPU event","text":"There is no simpleperf annotate
. The simpleperf
documentation lists a couple of ways of achieving the same thing.
However:
- The common case of annotating by time, as opposed to annotating by CPU event, is supported by Tracy.
- Annotating by CPU event is inherently not working due to hardware limitations of the ARM CPUs found in Android devices. That is, the hardware is too imprecise at pinning an event to a particular instruction.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#interpreting-cpu-event-counts","title":"Interpreting CPU event counts","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#problems","title":"Problems","text":"There are multiple layers of complexity in interpreting CPU event counts.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-in-themselves-normal","title":"These events are in themselves normal","text":"The first difficulty is in the fact that most of these events are normal. So just knowing that they happened is not in itself actionable.
For example, if we learn that some code causes cache misses, that isn't big news: so does all code. Maybe this code has too many cache misses, but how many is too many? Maybe this code alone accounts for a large fraction of the overall total of the whole program, but maybe even that is normal, for instance if the code being studied is the 'hot' part of the program where a large fraction of overall time is spent?
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-hardware-dependent-and-under-documented","title":"These events are hardware-dependent and under-documented","text":"Many of these events have a meaning that varies between CPUs and that is difficult to characterize on any CPU, let alone in a way that applies to all CPUs.
For example, take the \"L2 data cache refill\". On ARM, with simpleperf
, that would be raw-l2d-cache-refill
. Questions:
- Is \u201cL2\u201d inclusive of \u201cL1\u201d?
- How many bytes are transferred per \u201crefill\u201d?
- Are accesses induced by speculative execution or by automatic pre-fetching counted in the same way as accesses induced by actual code execution?
The answers to all of the above questions are CPU-dependent. They may even vary between the CPU cores of the same Android device.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-imprecise-and-noisy-particularly-on-arm-cpus","title":"These events are imprecise and noisy, particularly on ARM CPUs","text":"Expect noise levels above 10% in many CPU event counts on ARM CPUs. Moreover, on ARM, as discussed above, there is inaccuracy in which instruction is blamed for which event, which will increase inaccuracy of per-symbol breakdowns for very cheap symbols (and makes perf annotate
impossible as noted above). Finally, be aware that some ARM CPUs may perform event count interpolation, so we may not have any access to true hardware counts.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#recommendations","title":"Recommendations","text":"Here is a workflow pattern that allows to make significant use of CPU event counts, despite all the problems noted above:
- Hypothesize that some code diff might help performance, and might help reducing the number of CPU events of a certain type, and that the two might be related.
- Benchmark with and without the code diff, on the same device, everything else being equal.
- Let your benchmark perform a fixed number of iterations, or, if using a benchmark termination condition of the form \"run until at least N seconds have elapsed\", carefully divide event counts by the actual number of iterations that were run.
- If the observed CPU event count difference is significant, go ahead and claim that your code diff probably helps with that aspect of CPU behavior.
Some things NOT to be done:
- Don\u2019t try to compare different metrics, not even when it seems obvious that they should satisfy a simple relationship, not even on the same CPU (e.g. \u201cL1 accesses should be greater than L2 accesses\u201d).
- Don\u2019t divide by some \u201ctotal\u201d metric to get some kinds of ratios. For example, don\u2019t try to compute a \u201ccache miss ratio\u201d as quotient of \u201ccache refill\u201d over \u201call cache accesses\u201d metrics. The first problem with that (even before we get to CPU-specific issues) is that that\u2019s rewarding increases to the \u201call cache accesses\u201d metrics, so if something bad happens in your codegen and your kernel ends up spilling a lot of register to the stack, that\u2019s going to be a lot more accesses which will all be L1 hits so that\u2019ll help this ratio look better! So more generally, just try to minimize some CPU metrics (that count \u201ccostly\u201d events), not some more complex math expression formed from arithmetic on CPU metrics.
","tags":["CPU"]},{"location":"developers/performance/profiling-gpu-vulkan/","title":"Profiling GPUs using Vulkan","text":"Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used.
(TODO: add some pictures for each tool)
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#renderdoc","title":"RenderDoc","text":"Support for RenderDoc can be enabled by configuring cmake with -DIREE_ENABLE_RENDERDOC_PROFILING=ON
. When built in to IREE the profiling functionality is available for programmatic use via the iree_hal_device_profiling_begin
and iree_hal_device_profiling_end
APIs.
When using one of the standard IREE tools (iree-run-module
, iree-benchmark-module
, etc) the --device_profiling_mode=queue
flag can be passed to enable capture around the entire invocation (be careful when benchmarking as the recordings can be quite large!). The default capture file name can be specified with --device_profiling_file=foo.rdc
.
Capturing in the RenderDoc UI can be done by specifying the IREE tool or embedding application (iree-run-module
, etc) as the launch executable and adding all arguments as normal.
Capturing from the command line can be done using renderdoccmd
with the specified file appearing (by default) in the executable directory:
renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ...\nstat tools/foo.rdc\nrenderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ...\nstat /some/path/foo.rdc\n
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#android-gpus","title":"Android GPUs","text":"There are multiple GPU vendors for the Android platforms, each offering their own tools. Android GPU Inspector (AGI) provides a cross-vendor solution. See the documentation for more details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#build-android-app-to-run-iree","title":"Build Android app to run IREE","text":"In order to perform capture and analysis with AGI, you will need a full Android app. In IREE we have a simple Android native app wrapper to help package IREE core libraries together with a specific VM bytecode invocation into an Android app. The wrapper and its documentation are placed at tools/android/run_module_app/
.
For example, to package a module compiled from the following stablehlo-dot.mlir
as an Android app:
func @dot(%lhs: tensor<2x4xf32>, %rhs: tensor<4x2xf32>) -> tensor<2x2xf32> {\n %0 = \"stablehlo.dot\"(%lhs, %rhs) : (tensor<2x4xf32>, tensor<4x2xf32>) -> tensor<2x2xf32>\n return %0 : tensor<2x2xf32>\n}\n
# First compile into a VM bytecode module\n$ /path/to/iree/build/tools/iree-compile -- \\\n--iree-input-type=stablehlo \\\n--iree-hal-target-backends=vulkan-spirv \\\n/path/to/stablehlo-dot.mlir \\\n-o /tmp/stablehlo-dot.vmfb\n\n# Then package the Android app\n$ /path/to/iree/source/tools/android/run_module_app/build_apk.sh \\\n./build-apk \\\n--device vulkan \\\n--module /tmp/stablehlo-dot.vmfb \\\n--function dot \\\n--input=...\n
Where /path/to/input/file
is a file containing inputs to dot
, for example:
2x4xf32=[[1.0 2.0 3.0 4.0][5.0 6.0 7.0 8.0]]\n4x2xf32=[[9.0 10.0][11.0 12.0][13.0 14.0][15.0 16.0]]\n
The above will build an iree-run-module.apk
under the ./build-apk/
directory, which you can then install via adb install
.
build_apk.sh
needs the Android SDK and NDK internally, an easy way to manage them is by installing Android Studio. After installation, you will need to set up a few environment variables, which are printed at the beginning of build_apk.sh
invocation.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#capture-and-analyze-with-agi","title":"Capture and analyze with AGI","text":"You can follow AGI's Getting Started page to learn how to use it. In general the steps are:
- Install the latest AGI from https://github.com/google/agi/releases and launch.
- Fill in the \"Application\" field by searching the app. The line should read like
android.intent.action.MAIN:dev.iree.run_module/android.app.NativeActivity
. - Select start at beginning and choose a proper duration.
- Configure system profile to include all GPU counters.
- Start capture.
Generated traces are in the perfetto format. They can be viewed directly within AGI and also online in a browser at https://ui.perfetto.dev/, without needing an Android device.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#desktop-gpus","title":"Desktop GPUs","text":"Vulkan supports both graphics and compute, but most tools in the Vulkan ecosystem focus on graphics. As a result, some Vulkan profiling tools expect commands to correspond to a sequence of frames presented to displays via framebuffers. This means additional steps for IREE and other Vulkan applications that solely rely on headless compute. For graphics-focused tools, we need to wrap IREE's logic inside a dummy rendering loop in order to provide the necessary markers for these tools to perform capture and analysis.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#amd","title":"AMD","text":"For AMD GPUs, Radeon GPU Profiler (RGP) is the tool to understand fine details of how IREE GPU performs. See the documentation for details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#nvidia","title":"NVIDIA","text":"For NVIDIA GPUs, NVIDIA Nsight Graphics is the tool to understand fine details of how IREE GPU performs. See the documentation for details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-with-tracy/","title":"Profiling with Tracy","text":"Tracy is a profiler that puts together in a single view both instrumentation and system profiling (sampling, systrace). It's key to understand the nuance here.
- Instrumentation is code built into the process being profiled, collecting timestamps at the start and end of \"zones\". Once it's enabled at build time, it typically just works \u2014 it is a part of our application logic just like anything else, so there's no reason why it would not work.
- Sampling and SysTrace rely on specific system features to collect information on what is actually running. These rely on OS and binary (ELF) file features, so they can take a bit more care to get to work properly.
There are two components to Tracy. They communicate over a TCP socket.
- The \"client\" is the program being profiled.
- The \"server\" is:
- Either the Tracy profiler UI (which we build as
iree-tracy-profiler
), - Or the Tracy command-line capture tool (
iree-tracy-capture
) that can save a trace for later loading in the Tracy profiler UI.
"},{"location":"developers/performance/profiling-with-tracy/#the-tracy-manual","title":"The Tracy manual","text":"The primary source of Tracy documentation, including for build instructions, is a PDF manual that's part of each numbered release. Download or view in browser.
"},{"location":"developers/performance/profiling-with-tracy/#overview","title":"Overview","text":"We will go through each steps below, but here is an overview. It highlights the simpler subset of instructions when only instrumentation is needed, vs. the additional steps needed when Sampling is also wanted.
Component Instrumentation only Instrumentation and Sampling Build Tracy capture (iree-tracy-capture
) Base instructions below for dependencies and build Same Build Tracy profiler (iree-tracy-profiler
) Base instructions below for dependencies and build Same plus capstone-next
instructions for CPU disassembly to work Build the IREE compiler (iree-compile
) for profiling your own modules Nothing particular Same Build the IREE compiler (iree-compile
) for profiling the compiler itself Also need CMake setting: IREE_ENABLE_COMPILER_TRACING
Same Compile your IREE module (run iree-compile
) Nothing particular Also need to pass --iree-llvmcpu-link-embedded=false
(and also, for llvm-cpu
backend, pass --iree-llvmcpu-debug-symbols=true
, but that is currently default). Build IREE device binaries (iree-run-module
etc) Base instructions below (CMake: set IREE_ENABLE_RUNTIME_TRACING
) Also need debug information (Set CMAKE_BUILD_TYPE
to RelWithDebInfo
). Run IREE device binaries loading your modules Nothing particular (May need to set the environment variable TRACY_NO_EXIT=1
for short-running benchmarks) Also need to set the environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
and adjust device security settings or run as root depending on OS. Run Tracy capture (iree-tracy-capture
) to collect the trace If device!=host (e.g. Android), set up TCP port forwarding. Same Build IREE's own tests and benchmark suites with Tracy instrumentation As above, CMake: set IREE_ENABLE_RUNTIME_TRACING
. Also need the CMake setting IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
so that --iree-llvmcpu-link-embedded=false
will be passed to iree-compile
."},{"location":"developers/performance/profiling-with-tracy/#install-dependencies","title":"Install dependencies","text":""},{"location":"developers/performance/profiling-with-tracy/#do-you-need-capstone-next","title":"Do you need capstone-next?","text":"You can skip this section if you don't need disassembly of CPU code.
Capstone is the disassembly framework used by Tracy. The default branch, which is what OS packages still distribute, is running a few years behind current CPU architectures.
Newer CPU architectures such as RISC-V, or newer extensions of existing architectures (e.g. new SIMD instructions in the ARM architecture) are typically only supported in the next
branch. If you need that support, check out and build that branch. Consider uninstalling any OS package for capstone
or otherwise ensure that your IREE build will pick up your next
branch build.
"},{"location":"developers/performance/profiling-with-tracy/#linux","title":"Linux","text":"If you haven't opted to build capstone-next
(see above section), install the OS package for capstone
now (Debian-based distributions):
sudo apt install libcapstone-dev\n
Install other dependencies:
sudo apt install libtbb-dev libzstd-dev libglfw3-dev libfreetype6-dev libgtk-3-dev\n
If you only build the command-line tool iree-tracy-capture
and not the graphical iree-tracy-profiler
, you can install only:
sudo apt install libtbb-dev libzstd-dev\n
The zstd version on Ubuntu 18.04 is old. You will need to install it from source from https://github.com/facebook/zstd.git
"},{"location":"developers/performance/profiling-with-tracy/#mac","title":"Mac","text":"If you haven't opted to build capstone-next
(see above section), install the system capstone
now:
brew install capstone\n
Install other dependencies:
brew install pkg-config glfw freetype tbb zstd\n
"},{"location":"developers/performance/profiling-with-tracy/#build-the-tracy-tools","title":"Build the Tracy tools","text":"A CMake-based build system for Tracy is maintained as part of IREE. In your IREE desktop build directory, set the following CMake option:
cmake -DIREE_BUILD_TRACY=ON -DIREE_ENABLE_LLD=ON .\n
That enables building the Tracy server tools, iree-tracy-profiler
and iree-tracy-capture
, introduced above. It also enables building the tool iree-tracy-csvexport
which can be used to export a captured trace as a CSV file (see Section 6 \"Exporting zone statistics to CSV\" in the Tracy manual).
If profiling on Android/ARM, you might need the patch discussed in the next paragraph.
Consider building without assertions (cmake -DIREE_ENABLE_ASSERTIONS=OFF
). At least iree-tracy-profiler
has some faulty assertions that can cause the profiler UI to crash during normal usage.
Rebuild, either everything or just these specific targets:
cmake --build . --target iree-tracy-profiler iree-tracy-capture iree-tracy-csvexport\n
This should have created the iree-tracy-profiler
, iree-tracy-capture
, and iree-tracy-csvexport
binaries:
$ find . -name iree-tracy-*\n./tracy/iree-tracy-profiler\n./tracy/iree-tracy-capture\n./tracy/iree-tracy-csvexport\n
"},{"location":"developers/performance/profiling-with-tracy/#build-the-iree-compiler-iree-compile","title":"Build the IREE compiler (iree-compile
)","text":"Most people don't need to rebuild iree-compile
at all for Tracy and can skip this section.
If you want to profile iree-compile
itself as opposed to just profiling modules compiled with it, then rebuild it with the CMake setting IREE_ENABLE_COMPILER_TRACING
set to ON
.
"},{"location":"developers/performance/profiling-with-tracy/#compile-your-iree-module-run-iree-compile","title":"Compile your IREE module (run iree-compile
)","text":"If you only want Instrumentation and not Sampling then you don't need anything particular here. Just run iree-compile
as usual.
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling","title":"Additional steps for Sampling","text":"In order for Sampling to work with your compiled modules, add this flag to your iree-compile
command line: --iree-llvmcpu-link-embedded=false
.
For the llvm-cpu
target backend, sampling features also rely on debug information in the compiled module, enabled by --iree-llvmcpu-debug-symbols=true
, but that is currently the default.
When building IREE's own test and benchmark suites, if Tracy Sampling support is wanted, set the CMake setting IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
to ON
. It has the effect of passing that --iree-llvmcpu-link-embedded=false
when compiling test/benchmark modules.
"},{"location":"developers/performance/profiling-with-tracy/#build-iree-device-binaries-with-tracy-instrumentation-clients","title":"Build IREE device binaries with Tracy instrumentation (\"clients\")","text":"Set the CMake setting IREE_ENABLE_RUNTIME_TRACING
to ON
and rebuild IREE device binaries, e.g.
cd iree-device-build-dir\ncmake -DIREE_ENABLE_RUNTIME_TRACING=ON .\ncmake --build .\n
Tip - python bindings
The iree-runtime
Python package includes instrumented tools too. Set the IREE_PY_RUNTIME=tracy
environment variable to use them:
$ python -m pip install iree-runtime\n$ IREE_PY_RUNTIME=tracy iree-run-module ...\n
See this section in the Python bindings documentation for more details.
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling_1","title":"Additional steps for Sampling","text":"In order for Sampling features to work, make sure that binaries contain debug information. That usually means changing the CMAKE_BUILD_TYPE
to RelWithDebInfo
instead of Release
.
In your IREE device build directory, set the following CMake options:
cd iree-device-build-dir\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .\n
"},{"location":"developers/performance/profiling-with-tracy/#running-the-profiled-program","title":"Running the profiled program","text":"The basic recipe is to just run your program as usual on the device and, while it is running, run iree-tracy-capture
on the host to connect to it.
In the typical case of a short-running benchmark, one usually runs with the environment variable TRACY_NO_EXIT
defined so that the benchmark does not exit until iree-tracy-capture
has connected to it.
Example:
TRACY_NO_EXIT=1 /data/local/tmp/iree-benchmark-module ... (usual flags)\n
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling_2","title":"Additional steps for Sampling","text":"In order for Sampling to work, the IREE compiled module code mapping must still be accessible by the time Tracy tries to read symbols code. This requires setting the environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
. It is easiest to set it to 1
but one may also set it to an explicit path where to preserve the temporary files.
Example:
TRACY_NO_EXIT=1 IREE_PRESERVE_DYLIB_TEMP_FILES=1 /data/local/tmp/iree-benchmark-module ... (usual flags)\n
Tracing doesn't work properly on VMs (see \"Problematic Platforms / Virtual Machines\" section 2.1.6.4 of the manual). To get sampling, you should run the profiled program on bare metal.
"},{"location":"developers/performance/profiling-with-tracy/#operating-system-settings-required-for-sampling-and-systrace","title":"Operating system settings required for Sampling and SysTrace","text":""},{"location":"developers/performance/profiling-with-tracy/#desktop-linux","title":"Desktop Linux","text":"On desktop Linux, the profiled application must be run as root, e.g. with sudo
. Otherwise, profile data will lack important components.
"},{"location":"developers/performance/profiling-with-tracy/#android","title":"Android","text":"When profiling on an Android device, in order to get the most useful information in the trace, tweak system permissions as follows before profiling. This needs to be done again after every reboot of the Android device.
From your desktop, get a shell on the Android device:
adb shell\n
The following commands are meant to be run from that Android device shell. First, get root access for this shell:
$ su\n#\n
Now run the following commands as root on the Android device:
setenforce 0\nmount -o remount,hidepid=0 /proc\necho 0 > /proc/sys/kernel/perf_event_paranoid\necho 0 > /proc/sys/kernel/kptr_restrict\n
Note: in order for this to work, the device needs to be rooted, which means that the above su
command must succeed. This is sometimes confused with the adb root
command, but that's not the same. adb root
restarts the adbd
daemon as root, which causes device shells to be root shells by default. This is unnecessary here and we don't recommend it: real Android applications never run as root, so Tracy/Android has to support running benchmarks as regular user and it's best to stick to this for the sake of realistic benchmarks. Internally, Tracy executes su
commands to perform certain actions, so it too relies on the device being rooted without relying on the benchmark process being run as root.
"},{"location":"developers/performance/profiling-with-tracy/#resource_exhausted-failed-to-open-file-issue","title":"\"RESOURCE_EXHAUSTED; failed to open file\" issue","text":"This is a known issue with how tracy operates. One way to workaround it is to manually increase the total number of files that can be kept opened simultaneously and run the benchmark command with that setting:
sudo sh -c \"ulimit -n <bigNum> && <myTracyInstrumentedProgram>\"\n
Explanation:
Tracy keeps a number of file descriptors open that, depending on the machine and its settings, may exceed the limit allowed by the system resulting in iree
to fail to open more files. In particular, it is commom to have a relatively low limit when running with sudo
.
"},{"location":"developers/performance/profiling-with-tracy/#running-the-tracy-capture-cli-connecting-and-saving-profiles","title":"Running the Tracy Capture CLI, connecting and saving profiles","text":"While the program that you want to profile is still running (thanks to TRACY_NO_EXIT=1
), start the Tracy capture tool in another terminal. From the IREE build directory:
tracy/iree-tracy-capture -o myprofile.tracy\nConnecting to 127.0.0.1:8086...\n
It should connect to the IREE client and save the output to myprofile.tracy that can be visualized by the client below. You can start the capture tool first to make sure you don't miss any capture events.
Note that the connection uses TCP port 8086. If the Tracy-instrumented program is running on a separate machine, this port needs to be forwarded. In particular, when benchmarking on Android, this is needed:
adb forward tcp:8086 tcp:8086\n
"},{"location":"developers/performance/profiling-with-tracy/#running-the-tracy-profiler-ui-connecting-and-visualizing","title":"Running the Tracy profiler UI, connecting and visualizing","text":"If you have previously captured a tracy file (previous section), this command should succeed loading it (from the IREE build directory):
tracy/iree-tracy-profiler myprofile.tracy\n
Alternatively, while the program that you want to profile is still running (possibly thanks to TRACY_NO_EXIT=1
), the Tracy profiler can connect to it directly (so it is not required to capture the trace into a file): just running
tracy/iree-tracy-profiler\n
should show a dialog offering to connect to a client i.e. a profiled program:
If connecting doesn't work:
- If the profiled program is on a separate machine, make sure you've correctly set up port forwarding.
- On Android, the
adb forward
may need to be run again. - Make sure that the profiled program is still running. Do you need
TRACY_NO_EXIT=1
? - Kill the profiled program and restart it.
You should now start seeing a profile. The initial view should look like this:
Before going further, take a second to check that your recorded profile data has all the data that it should have. Permissions issues, as discussed above, could cause it to lack \"sampling\" or \"CPU data\" information, particularly on Android. For example, here is what he initial view looks like when one forgot to run the profiled program as root on Desktop Linux (where running as root is required, as explained above):
Notice how the latter screenshot is lacking the following elements:
- No 'CPU data' header on the left side, with the list of all CPU cores. The 'CPU usage' graph is something else.
- No 'ghost' icon next to the 'Main thread' header.
Click the 'Statistics' button at the top. It will open a window like this:
See how the above screenshot has two radio buttons at the top: 'Instrumentation' and 'Sampling'. At this point, if you don't see the 'Sampling' radio button, you need to resolve that first, as discussed above about possible permissions issues.
These 'Instrumentation' and 'Sampling' statistics correspond the two kinds of data that Tracy collects about your program. In the Tracy main view, they correspond, respectively, to 'instrumentation' and 'ghost' zones. Refer to the Tracy PDF manual for a general introduction to these concepts. For each thread, the ghost icon toggles the view between these two kinds of zones.
Back to the main view, look for the part of the timeline that is of interest to you. Your area of interest might not be on the Main thread. In fact, it might be on a thread that's not visible in the initial view at all. To pan around with the mouse, hold the right mouse button down (or its keyboard equivalent on macOS). Alternatively, look for the 'Frame' control at the top of the Tracy window. Use the 'next frame' arrow button until more interesting threads appear.
IREE module code tends to run on a thread whose name contains the word worker
.
Once you have identified the thread of interest, you typically want to click its ghost icon to view its \"ghost\" (i.e. sampling) zones.
Here is what you should get when clicking on a ghost zone:
The percentages column to the left of the disassembly shows where time is being spent. This is unique to the sampling data (ghost zones) and has no equivalent in the instrumentation data (instrumentation zones). Here is what we get clicking on the corresponding instrumentation zone:
This still has a 'Source' button but that only shows the last C++ caller that had explicit Tracy information, so here we see a file under iree/hal
whereas the Ghost zone saw into the IREE compiled module that that calls into, with the source view pointing to the .mlir
file.
"},{"location":"developers/performance/profiling-with-tracy/#configuring-tracy-instrumentation","title":"Configuring Tracy instrumentation","text":"Set IREE's IREE_TRACING_MODE
value (defined in iree/base/tracing.h) to adjust which tracing features, such as allocation tracking and callstacks, are enabled.
"},{"location":"developers/performance/profiling/","title":"Profiling overview","text":"IREE benchmarking gives us an accurate and reproducible view of program performance at specific levels of granularity. To analyze system behavior in more depth, there are various ways to profile IREE.
"},{"location":"developers/performance/profiling/#cpu-cache-and-other-cpu-event-profiling","title":"CPU cache and other CPU event profiling","text":"For some advanced CPU profiling needs such as querying CPU cache and other events, one may need to use some OS-specific profilers. See Profiling CPUs.
"},{"location":"developers/performance/profiling/#vulkan-gpu-profiling","title":"Vulkan GPU Profiling","text":"Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used. Refer to Profiling GPUs using Vulkan.
"},{"location":"developers/performance/profiling/#tracy","title":"Tracy","text":"Tracy is a profiler that's been used for a wide range of profiling tasks on IREE. Refer to Profiling with Tracy.
"},{"location":"guides/","title":"Guides","text":""},{"location":"guides/#ml-frameworks","title":"ML frameworks","text":"Start here: ML frameworks overview
Guides for specific frameworks:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
"},{"location":"guides/#deployment-configurations","title":"Deployment configurations","text":"Start here: Deplyment configurations overview
Guides for specific configurations:
- CPU for general purpose CPU deployment
- CPU - Bare-Metal with minimal platform dependencies
- GPU - Vulkan for cross-platform usage and interop with graphics applications
- GPU - CUDA for NVIDIA-specific solutions
- GPU - ROCm for AMD-specific solutions
- GPU - Metal for running on Apple hardware
"},{"location":"guides/deployment-configurations/","title":"Deployment configurations","text":"IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE to load programs on demand and to take advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.
"},{"location":"guides/deployment-configurations/#stable-configurations","title":"Stable configurations","text":" - CPU for general purpose CPU deployment
- CPU - Bare-Metal with minimal platform dependencies
- GPU - Vulkan for cross-platform usage and interop with graphics applications
- GPU - CUDA for NVIDIA-specific solutions
- GPU - ROCm for AMD-specific solutions
- GPU - Metal for running on Apple hardware
These are just the most stable configurations IREE supports. Feel free to reach out on any of IREE's communication channels if you have questions about a specific platform, hardware accelerator, or set of system features.
"},{"location":"guides/deployment-configurations/#compiler-target-backends","title":"Compiler target backends","text":"Compiler target backends are used to generate executable code for hardware APIs and device architectures. Compiler targets may implement special optimizations or generate distinct code for certain device/architecture/performance profiles.
When compiling programs, a list of target backends must be specified via
--iree-hal-target-backends=
(command-line) target_backends=[...]
(Python)
Target backend Description Compatible HAL devices llvm-cpu
Code generation for CPU-like devices supported by LLVM local-sync
, local-task
vmvx
Portable interpreter powered by a microkernel library local-sync
, local-task
vulkan
orvulkan-spirv
Portable GPU support via SPIR-V for Vulkan vulkan
cuda
NVIDIA GPU support via PTX for CUDA cuda
metal
ormetal-spirv
GPU support on Apple platforms via MSL for Metal metal
rocm
Experimental AMD GPU support via HSACO for ROCm rocm
webgpu-wgsl
Experimental GPU support on the Web via WGSL for WebGPU webgpu
Tip - listing available backends
The list of compiler target backends can be queried:
Command-linePython bindings $ iree-compile --iree-hal-list-target-backends\n\nRegistered target backends:\n cuda\n llvm-cpu\n metal\n metal-spirv\n rocm\n vmvx\n vmvx-inline\n vulkan\n vulkan-spirv\n
iree.compiler.query_available_targets()\n\n['cuda',\n 'llvm-cpu',\n 'metal',\n 'metal-spirv',\n 'rocm',\n 'vmvx',\n 'vmvx-inline',\n 'vulkan',\n 'vulkan-spirv']\n
"},{"location":"guides/deployment-configurations/#runtime-hal-driversdevices","title":"Runtime HAL drivers/devices","text":"Runtime HAL devices call into hardware APIs to load and run executable code. Devices may use multithreading or other system resources, depending on their focus and the build configuration.
HAL device Description local-sync
Synchronous local CPU device with inline execution local-task
Multithreaded local CPU device using a 'task' executor vulkan
Portable GPU execution using the Vulkan API cuda
NVIDIA GPU execution using CUDA metal
GPU execution on Apple platforms using Metal rocm
Experimental AMD GPU execution using ROCm webgpu
Experimental GPU execution on the web using WebGPU Additional HAL drivers can also be defined external to the core project via IREE_EXTERNAL_HAL_DRIVERS
.
"},{"location":"guides/deployment-configurations/bare-metal/","title":"Running on a bare-metal platform","text":"IREE supports model execution via CPU on bare-metal platforms. Bare metal platforms have no operating system support, and executables are built using machine-specific linker scripts and/or board support packages (BSPs).
Bare-metal deployment typically uses IREE's LLVM compiler target backend much like the CPU configuration, but using a limited subset of IREE's CPU HAL driver code at runtime to load and execute compiled programs.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#prerequisites","title":"Prerequisites","text":"Out-of-tree bare-metal platform tools and source code for the system should be ready, such as
- Compilation toolchain
- Platform linker script
- Firmware libraries
Please follow the instructions to retrieve the IREE compiler.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compile-the-model-for-bare-metal","title":"Compile the model for bare-metal","text":"The model can be compiled with the following command:
iree-compile \\\n--iree-stream-partitioning-favor=min-peak-memory \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-target-triple=x86_64-pc-linux-elf \\\n--iree-llvmcpu-debug-symbols=false \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_cpu.vmfb\n
In which
--iree-stream-partitioning-favor=min-peak-memory
: Optimize for minimum peak memory usage at the cost of concurrency - include when targeting single-threaded execution to reduce memory consumption. --iree-hal-target-backends=llvm-cpu
: Compile using the LLVM CPU target --iree-llvmcpu-target-triple
: Use the <arch>-pc-linux-elf
LLVM target triple so the artifact has a fixed ABI to be rendered by the elf_module library --iree-llvmcpu-debug-symbols=false
: To reduce the artifact size
See generate.sh for example command-line instructions of some common architectures.
You can replace the MLIR file with the other MLIR model files, following the instructions.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compiling-the-bare-metal-model-for-static-library-support","title":"Compiling the bare-metal model for static-library support","text":"See the static_library demo sample for an example and instructions on running a model with IREE's static_library_loader
.
By default, the demo targets the host machine when compiling. To produce a bare-metal compatible model, run iree-compile
as in the previous example and add the additional -iree-llvmcpu-static-library-output-path=
flag to specify the static library destination. This will produce a .h\\.o
file to link directly into the target application.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#build-bare-metal-runtime-from-source","title":"Build bare-metal runtime from source","text":"A few CMake options and macros should be set to build a subset of IREE runtime libraries compatible with the bare-metal platform. We assume there's no multi-thread control nor system library support in the bare-metal system. The model execution is in a single-thread synchronous fashion.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#set-cmake-options","title":"Set CMake options","text":"# Build the IREE runtime only\nset(IREE_BUILD_COMPILER OFF)\n\n# Tell CMake to skip targeting a specific operating system\nset(CMAKE_SYSTEM_NAME Generic)\n\n# Disable multi-thread library support\nset(IREE_ENABLE_THREADING OFF)\n\n# Only enable the local synchronous HAL driver\nset(IREE_HAL_DRIVER_DEFAULTS OFF)\nset(IREE_HAL_DRIVER_LOCAL_SYNC ON)\n\n# Only enable some executable loaders\nset(IREE_HAL_EXECUTABLE_LOADER_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF ON)\nset(IREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE ON)\n\n# Only enable the embedded ELF executable plugin\nset(IREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_PLUGIN_EMBEDDED_ELF ON)\n\n# Disable tests until IREE supports running them on bare-metal platforms\nset(IREE_BUILD_TESTS OFF)\n\n# Build samples\nset(IREE_BUILD_SAMPLES ON)\n
Todo
Clean the list up after #6353 is fixed.
Also, set the toolchain-specific cmake file to match the tool path, target architecture, target abi, linker script, system library path, etc.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#define-iree-macros","title":"Define IREE macros","text":"These macros should be defined, either in C/C++ or via CMake options like
set(MY_FLAGS \"-DIREE_PLATFORM_GENERIC=1\")\nset(CMAKE_C_FLAGS ${MY_FLAGS} ${CMAKE_C_FLAGS})\nset(CMAKE_CXX_FLAGS ${MY_FLAGS} ${CMAKE_CXX_FLAGS})\n
Macro Description IREE_PLATFORM_GENERIC
Let IREE build the runtime library without targeting a specific platform. IREE_SYNCHRONIZATION_DISABLE_UNSAFE=1
Disable thread synchronization support.Must only be used if there's a single thread. IREE_FILE_IO_ENABLE=0
Disable file I/O. IREE_TIME_NOW_FN
A function to return the system time. For the bare-metal systems, it can be set as IREE_TIME_NOW_FN=\\\"\\{ return 0;\\}\\\"
as there's no asynchronous wait handling. IREE_WAIT_UNTIL_FN
A function to wait until the given time in nanoseconds. Must match the signature bool(uint64_t nanos)
and return false if the wait failed. Examples of how to setup the CMakeLists.txt and .cmake file:
- IREE RISC-V toolchain cmake
- IREE Bare-Metal Arm Sample
- IREE Bare-Metal RV32 Sample
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#bare-metal-execution-example","title":"Bare-metal execution example","text":"See simple_embedding for generic platform to see how to use the IREE runtime library to build/run the IREE model for the bare-metal target.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/","title":"CPU deployment","text":"IREE supports efficient program execution on CPU devices by using LLVM to compile all dense computations in each program into highly optimized CPU native instruction streams, which are embedded in one of IREE's deployable formats.
To compile a program for CPU execution, pick one of IREE's supported executable formats:
Executable Format Description embedded ELF portable, high performance dynamic library system library platform-specific dynamic library (.so, .dll, etc.) VMVX reference target At runtime, CPU executables can be loaded using one of IREE's CPU HAL drivers:
local-task
: asynchronous, multithreaded driver built on IREE's \"task\" system local-sync
: synchronous, single-threaded driver that executes work inline
Todo
Add IREE's CPU support matrix: what architectures are supported; what architectures are well optimized; etc.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the LLVM-based CPU compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation or iOS cross-compilation page if you are cross compiling for a mobile device. The llvm-cpu
compiler backend is compiled in by default on all platforms.
Ensure that the IREE_TARGET_BACKEND_LLVM_CPU
CMake option is ON
when configuring for the host.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-runtime","title":"Get the IREE runtime","text":"You will need to get an IREE runtime that supports the local CPU HAL driver, along with the appropriate executable loaders for your application.
You can check for CPU support by looking for the local-sync
and local-task
drivers:
$ iree-run-module --list_drivers\n\n cuda: CUDA (dynamic)\n local-sync: Local execution using a lightweight inline synchronous queue\n local-task: Local execution using the IREE multithreading task system\n vulkan: Vulkan 1.x (dynamic)\n
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The local CPU HAL drivers are compiled in by default on all platforms.
Ensure that the IREE_HAL_DRIVER_LOCAL_TASK
and IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF
(or other executable loader) CMake options are ON
when configuring for the target.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-and-run-a-program","title":"Compile and run a program","text":"With the requirements out of the way, we can now compile a model and run it.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the llvm-cpu
target:
iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\nmobilenet_iree_input.mlir -o mobilenet_cpu.vmfb\n
Tip - CPU targets
The --iree-llvmcpu-target-triple
flag tells the compiler to generate code for a specific type of CPU. You can see the list of supported targets with iree-compile --iree-llvmcpu-list-targets
, or pass \"host\" to let LLVM infer the triple from your host machine (e.g. x86_64-linux-gnu
).
$ iree-compile --iree-llvmcpu-list-targets\n\n Registered Targets:\n aarch64 - AArch64 (little endian)\n aarch64_32 - AArch64 (little endian ILP32)\n aarch64_be - AArch64 (big endian)\n arm - ARM\n arm64 - ARM64 (little endian)\n arm64_32 - ARM64 (little endian ILP32)\n armeb - ARM (big endian)\n riscv32 - 32-bit RISC-V\n riscv64 - 64-bit RISC-V\n wasm32 - WebAssembly 32-bit\n wasm64 - WebAssembly 64-bit\n x86 - 32-bit X86: Pentium-Pro and above\n x86-64 - 64-bit X86: EM64T and AMD64\n
Tip - CPU features
The --iree-llvmcpu-target-cpu-features
flag tells the compiler to generate code using certain CPU \"features\", like SIMD instruction sets. Like the target triple, you can pass \"host\" to this flag to let LLVM infer the features supported by your host machine.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#run-a-compiled-program","title":"Run a compiled program","text":"In the build directory, run the following command:
tools/iree-run-module \\\n--device=local-task \\\n--module=mobilenet_cpu.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["CPU"]},{"location":"guides/deployment-configurations/gpu-cuda/","title":"GPU deployment using CUDA","text":"IREE can accelerate model execution on Nvidia GPUs using CUDA.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#prerequisites","title":"Prerequisites","text":"In order to use CUDA to drive the GPU, you need to have a functional CUDA environment. It can be verified by the following steps:
nvidia-smi | grep CUDA\n
If nvidia-smi
does not exist, you will need to install the latest CUDA Toolkit SDK.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the CUDA compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build the IREE compiler, then enable the CUDA compiler target with the IREE_TARGET_BACKEND_CUDA
option.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that includes the CUDA HAL driver.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE from source, then enable the CUDA HAL driver with the IREE_HAL_DRIVER_CUDA
option.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"With the compiler and runtime ready, we can now compile programs and run them on GPUs.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:
iree-compile \\\n--iree-hal-target-backends=cuda \\\n--iree-hal-cuda-llvm-target-arch=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_cuda.vmfb\n
Note that a cuda target architecture (iree-hal-cuda-llvm-target-arch
) of the form sm_<arch_number>
is needed to compile towards each GPU architecture. If no architecture is specified then we will default to sm_35
.
Here is a table of commonly used architectures:
CUDA GPU Target Architecture Nvidia K80 sm_35
Nvidia P100 sm_60
Nvidia V100 sm_70
Nvidia A100 sm_80
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#run-a-compiled-program","title":"Run a compiled program","text":"Run the following command:
iree-run-module \\\n--device=cuda \\\n--module=mobilenet_cuda.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-metal/","title":"GPU deployment using Metal","text":"Documentation coming soon!
","tags":["GPU","iOS"]},{"location":"guides/deployment-configurations/gpu-rocm/","title":"GPU deployment using ROCm","text":"IREE can accelerate model execution on AMD GPUs using ROCm.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#prerequisites","title":"Prerequisites","text":"In order to use ROCm to drive the GPU, you need to have a functional ROCm environment. It can be verified by the following steps:
rocm-smi | grep rocm\n
If rocm-smi
does not exist, you will need to install the latest ROCm Toolkit SDK for Windows or Linux.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Currently ROCm is NOT supported for the Python interface.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build the IREE compiler, then enable the ROCm compiler target with the IREE_TARGET_BACKEND_ROCM
option.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that includes the ROCm HAL driver.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE from source, then enable the experimental ROCm HAL driver with the IREE_EXTERNAL_HAL_DRIVERS=rocm
option.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"With the compiler and runtime ready, we can now compile programs and run them on GPUs.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:
iree-compile \\\n--iree-hal-target-backends=rocm \\\n--iree-rocm-target-chip=<...> \\\n--iree-rocm-link-bc=true \\\n--iree-rocm-bc-dir=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_rocm.vmfb\n
Note ROCm Bitcode Dir (iree-rocm-bc-dir
) path is required. If the system you are compiling IREE in has ROCm installed, then the default value of /opt/rocm/amdgcn/bitcode
will usually suffice. If you intend on building ROCm compiler in a non-ROCm capable system, please set iree-rocm-bc-dir
to the absolute path where you might have saved the amdgcn bitcode.
Note that a ROCm target chip (iree-rocm-target-chip
) of the form gfx<arch_number>
is needed to compile towards each GPU architecture. If no architecture is specified then we will default to gfx908
.
Here is a table of commonly used architectures:
AMD GPU Target Chip AMD MI25 gfx900
AMD MI50 gfx906
AMD MI60 gfx906
AMD MI100 gfx908
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#run-a-compiled-program","title":"Run a compiled program","text":"Run the following command:
iree-run-module \\\n--device=rocm \\\n--module=mobilenet_rocm.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-vulkan/","title":"GPU deployment using Vulkan","text":"IREE can accelerate model execution on GPUs via Vulkan, a low-overhead graphics and compute API. Vulkan is cross-platform: it is available on many operating systems, including Android, Linux, and Windows. Vulkan is also cross-vendor: it is supported by most GPU vendors, including AMD, ARM, Intel, NVIDIA, and Qualcomm.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#support-matrix","title":"Support matrix","text":"As IREE and the compiler ecosystem it operates within matures, more target specific optimizations will be implemented. At this stage, expect reasonable performance across all GPUs and for improvements to be made over time for specific vendors and architectures.
GPU Vendor Category Performance Focus Architecture ARM Mali GPU Mobile Good Valhall Qualcomm Adreno GPU Mobile Reasonable 640+ AMD GPU Desktop/server Reasonable - NVIDIA GPU Desktop/server Good -","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#prerequisites","title":"Prerequisites","text":"In order to use Vulkan to drive the GPU, you need to have a functional Vulkan environment. IREE requires Vulkan 1.1 on Android and 1.2 elsewhere. It can be verified by the following steps:
Android Linux Windows Android mandates Vulkan 1.1 support since Android 10. You just need to make sure the device's Android version is 10 or higher.
Run the following command in a shell:
vulkaninfo | grep apiVersion\n
If vulkaninfo
does not exist, you will need to install the latest Vulkan SDK. Installing via LunarG's package repository is recommended, as it places Vulkan libraries and tools under system paths so it's easy to discover.
If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.
Run the following command in a shell:
vulkaninfo | grep apiVersion\n
If vulkaninfo
does not exist, you will need to install the latest Vulkan SDK.
If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-compiler","title":"Get the IREE compiler","text":"Vulkan expects the program running on GPU to be expressed by the SPIR-V binary exchange format, which the model must be compiled into.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the SPIR-V compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The SPIR-V compiler backend is compiled in by default on all platforms.
Ensure that the IREE_TARGET_BACKEND_VULKAN_SPIRV
CMake option is ON
when configuring for the host.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that supports the Vulkan HAL driver.
You can check for Vulkan support by looking for a matching driver and device:
$ iree-run-module --list_drivers\n\n cuda: CUDA (dynamic)\n local-sync: Local execution using a lightweight inline synchronous queue\n local-task: Local execution using the IREE multithreading task system\n vulkan: Vulkan 1.x (dynamic)\n
$ iree-run-module --list_devices\n\n cuda://GPU-00000000-1111-2222-3333-444444444444\n local-sync://\n local-task://\n vulkan://00000000-1111-2222-3333-444444444444\n
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE for Linux/Windows and the Android cross-compilation page for Android. The Vulkan HAL driver is compiled in by default on non-Apple platforms.
Ensure that the IREE_HAL_DRIVER_VULKAN
CMake option is ON
when configuring for the target.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-and-run-a-program","title":"Compile and run a program","text":"With the SPIR-V compiler and Vulkan runtime, we can now compile programs and run them on GPUs.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the vulkan-spirv
target:
iree-compile \\\n--iree-hal-target-backends=vulkan-spirv \\\n--iree-vulkan-target-triple=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_vulkan.vmfb\n
Note
A target triple of the form <vendor/arch>-<product>-<os>
is needed to compile towards each GPU architecture. If no triple is specified then a safe but more limited default will be used. We don't support the full spectrum here1; the following table summarizes the currently recognized ones:
GPU Vendor Target Triple ARM Mali GPU e.g., valhall-g78-android30
Qualcomm Adreno GPU e.g., adreno-unknown-android30
AMD GPU e.g., rdna1-5700xt-linux
NVIDIA GPU e..g, ampere-rtx3080-windows
SwiftShader CPU cpu-swiftshader-unknown
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#run-a-compiled-program","title":"Run a compiled program","text":"In the build directory, run the following command:
tools/iree-run-module \\\n--device=vulkan \\\n--module=mobilenet_vulkan.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
-
It's also impossible to capture all details of a Vulkan implementation with a target triple, given the allowed variances on extensions, properties, limits, etc. So the target triple is just an approximation for usage.\u00a0\u21a9
","tags":["GPU","Vulkan"]},{"location":"guides/ml-frameworks/","title":"ML frameworks","text":"IREE supports popular machine learning frameworks using the same underlying technology.
graph LR\n accTitle: ML framework to runtime deployment workflow overview\n accDescr {\n Programs start in some ML framework.\n Programs are imported into MLIR.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n A[ML frameworks]\n B[Imported MLIR]\n C[IREE compiler]\n D[Runtime deployment]\n\n A --> B\n B --> C\n C --> D
"},{"location":"guides/ml-frameworks/#supported-frameworks","title":"Supported frameworks","text":"See end-to-end examples of how to use each framework with IREE:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
Importing from other frameworks is planned - stay tuned!
"},{"location":"guides/ml-frameworks/#samples","title":"Samples","text":"Check out the samples in IREE's samples/
directory, as well as the iree-samples repository.
"},{"location":"guides/ml-frameworks/#exportimport","title":"Export/Import","text":"Each machine learning framework has some \"export\" mechanism that snapshots the structure and data in your program. These exported programs can then be \"imported\" into IREE's compiler by using either a stable import format or one of IREE's importer tools.
This export/import process is specific to each frontend and typically involves a number of stages:
- Capture/trace/freeze the ML model into a graph
- Write that graph to an interchange format (e.g. SavedModel, TorchScript)
- Load the saved program into an import tool and convert to MLIR
- Legalize the graph's operations so only IREE-compatible operations remain
- Write the imported MLIR to a file
This fully imported form can then be compiled indepedently of the source language and framework.
"},{"location":"guides/ml-frameworks/#compilation","title":"Compilation","text":"IREE compiles MLIR files for specified sets of backends (CPU, GPU, etc). Each backend generates optimized native code custom to the input program and intended target platform. Once compiled, modules can be executed using IREE's runtime.
See the deployment configuration guides for details on selecting a compiler backend and tuning options for your choice of target platform(s) or device(s).
"},{"location":"guides/ml-frameworks/#execution","title":"Execution","text":"Compiled modules can be executed by selecting what compute devices to use, loading the module, and then executing it with the intended inputs. IREE provides several language bindings for its runtime API.
"},{"location":"guides/ml-frameworks/jax/","title":"JAX integration","text":"Note
IREE's JAX support is under active development. This page is still under construction.
","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/jax/#overview","title":"Overview","text":"IREE offers two ways to interface with JAX programs:
- An API for extracting and compiling full models ahead of time (AOT) for execution apart from JAX. This API is being developed in the iree-org/iree-jax repository.
- A PJRT plugin that adapts IREE as a native JAX backend for online / just in time (JIT) use. This plugin is being developed in the openxla/openxla-pjrt-plugin repository.
","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/pytorch/","title":"PyTorch + IREE =","text":"Caution - under development
We are still validating and fixing specific models. Between bug fixes in flight and releases running behind, we don't expect that you will be able to do a lot of advanced things without using nightly releases or working with us.
Stay tuned and join the discussion in our Discord server's #pytorch
channel.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#overview","title":"Overview","text":"SHARK-Turbine offers a tight integration between compatible versions of IREE, torch-mlir, and PyTorch.
- Seamless integration with standard PyTorch workflows
- Deployment support for running PyTorch models on cloud and edge devices
- General purpose model compilation and execution tools
Both just-in-time (JIT) and ahead-of-time (AOT) workflows are supported:
graph LR\n accTitle: PyTorch integration overview\n accDescr {\n PyTorch programs can be optimized within a Python session with\n SHARK-Turbine's just-in-time tools.\n PyTorch programs can be exported out of Python to native binaries using\n SHARK-Turbine's ahead-of-time export toolkit.\n }\n\n subgraph Python\n pytorch(PyTorch)\n subgraph turbine [SHARK-Turbine]\n jit(\"Eager execution (JIT)\")\n aot(\"Export toolkit (AOT)\")\n end\n\n pytorch --> jit\n jit --> pytorch\n pytorch --> aot\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n aot -.-> binary
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#prerequisites","title":"Prerequisites","text":"Install Turbine and its requirements:
python -m pip install shark-turbine\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#just-in-time-jit-execution","title":"Just-in-time (JIT) execution","text":"Just-in-time integration allows for Python code using TorchDynamo to optimize PyTorch models/functions using IREE, all within an interactive Python session.
graph TD\n accTitle: PyTorch JIT workflow overview\n accDescr {\n Programs start as either PyTorch nn.Module objects or callable functions.\n Programs are compiled into optimized modules using torch.compile.\n Within torch.compile, Dynamo runs the program through Turbine and IREE.\n }\n\n subgraph Python\n input([nn.Module / function])\n\n subgraph compile [\"torch.compile()\"]\n direction LR\n dynamo{{TorchDynamo}}\n turbine{{SHARK-Turbine}}\n iree{{IREE}}\n dynamo --> turbine --> iree\n end\n\n output([Optimized module])\n input --> compile --> output\n end
For deployment outside of Python, see the ahead-of-time sections below.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#quickstart","title":"Quickstart","text":"Turbine integrates into PyTorch as a custom backend for torch.compile
.
Behind the scenes, PyTorch captures the structure of the input model into a computation graph and feeds that graph through to the selected backend compiler.
import torch\n\n# Define the `nn.Module` or Python function to run.\nclass LinearModule(torch.nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n def forward(self, input):\n return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Compile the program using the turbine backend.(1)\nopt_linear_module = torch.compile(linear_module, backend=\"turbine_cpu\")\n\n# Use the compiled program as you would the original program.\nargs = torch.randn(4)\nturbine_output = opt_linear_module(args)\n
- Initial integration only supports CPU, but support for many of IREE's other targets is coming soon.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples","title":"Samples","text":"Code samples JIT compilation notebook Simple MLP eager examples/eager_mlp/mlp_eager_simple.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#ahead-of-time-aot-export","title":"Ahead-of-time (AOT) export","text":"The ahead-of-time toolkit allows developers to define a program's structure in Python and then export deployment-ready artifacts that can be used in IREE's deployment configurations via the API bindings.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#simple-api","title":"Simple API","text":"For simple models, a one-shot export API is available.
graph LR\n accTitle: PyTorch simple AOT workflow overview\n accDescr {\n Programs start as PyTorch nn.Module objects.\n Modules are exported using the \"aot\" API.\n Exported outputs are then compiled to .vmfb files with executable binaries.\n }\n\n subgraph Python\n input([nn.Module])\n export([\"ExportOutput (MLIR)\"])\n input -- \"aot.export()\" --> export\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n export -. \"compile()\" .-> binary
import iree.runtime as ireert\nimport numpy as np\nimport shark_turbine.aot as aot\nimport torch\n\n# Define the `nn.Module` to export.\nclass LinearModule(torch.nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n def forward(self, input):\n return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Export the program using the simple API.\nexample_arg = torch.randn(4)\nexport_output = aot.export(linear_module, example_arg)\n\n# Compile to a deployable artifact.\nbinary = export_output.compile(save_to=None)\n\n# Use the IREE runtime API to test the compiled program.\nconfig = ireert.Config(\"local-task\")\nvm_module = ireert.load_vm_module(\n ireert.VmModule.wrap_buffer(config.vm_instance, binary.map_memory()),\n config,\n)\ninput = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)\nresult = vm_module.main(input)\nprint(result.to_host())\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_1","title":"Samples","text":"Code samples Simple AOT export notebook Simple MLP export examples/aot_mlp/mlp_export_simple.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#advanced-api","title":"Advanced API","text":"For more complex models, an underlying advanced API is available that gives access to more features.
graph LR\n accTitle: PyTorch advanced AOT workflow overview\n accDescr {\n Programs are represented using the aot.CompiledModule class.\n CompiledModules can extend nn.Module objects, export globals, and set\n shapes and dtypes for each function.\n Modules are exported using the \"aot\" API.\n Exported outputs are then compiled to .vmfb files with executable binaries.\n }\n\n subgraph Python\n compiledmodule(\"aot.CompiledModule\\n\\n- extend nn.Module\\n- export globals\\n- set shapes/dtypes\")\n export([\"ExportOutput (MLIR)\"])\n compiledmodule -- \"aot.export()\" --> export\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n export -. \"compile()\" .-> binary
Advanced export workflows can use the aot.CompiledModule
class to define and constrain the structure of a program prior to compiling it.
import shark_turbine.aot as aot\n\n# A minimal program, with no functions or variables.\nclass BasicModule(aot.CompiledModule):\n ...\n\n# Create an instance of the program and convert it to MLIR.\nfrom iree.compiler.ir import Context\ninstance = BasicModule(context=Context())\nmodule_str = str(aot.CompiledModule.get_mlir_module(instance))\n\nprint(module_str)\n# module @basic {\n# }\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#exporting-functions","title":"Exporting functions","text":"Exported functions are the API entry points into a compiled program.
Simple feed-forward neural networks used for inference may have a single exported function (typically called \"forward\"), while more complex programs can have multiple computation functions, initialization functions, \"backward\" methods for training, state management functions, debugging functions, etc.
-
Each instance method on a aot.CompiledModule
-derived class is exported. These instance methods can include calls to other aot
components, such as aot.jittable
compute functions:
class GetOnesModule(aot.CompiledModule):\n @aot.jittable\n def compute_ones():\n return torch.ones(3)\n\n def get_ones(self):\n return self.compute_ones()\n
-
Instance methods can use aot.AbstractTensor
to specify data types:
class IntSumModule(aot.CompiledModule):\n @aot.jittable\n def compute_sum(a, b):\n return a + b\n\n def sum_int32(\n self,\na=aot.AbstractTensor(2, dtype=torch.int32),\nb=aot.AbstractTensor(2, dtype=torch.int32),\n):\n return self.compute_sum(a, b)\n
-
Shapes can be made dynamic using aot.AbstractTensor
and aot.jittable
constraints:
class DynamicSumModule(aot.CompiledModule):\n @aot.jittable\n def compute_sum(a, b):\n return a + b\n\n def sum_dynamic(\n self,\na=aot.AbstractTensor(None),\nb=aot.AbstractTensor(None),\n):\n return self.compute_sum(\n a,\n b,\nconstraints=[\na.dynamic_dim(0) == b.dynamic_dim(0),\n],\n)\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#global-variables","title":"Global variables","text":"Global variables are used to represent persistent state within a program instance.
For example, they can be used to represent the weights and biases in a neural network, and exporting these as mutable variables can allow for setting their values independently at runtime.
-
Individual globals can be exported using aot.export_global()
:
state_example = torch.tensor(0, dtype=torch.int32)\nupdate_example = torch.tensor(0, dtype=torch.int32)\n\nclass SampleModule(aot.CompiledModule):\n value = aot.export_global(state_example, mutable=True)\n\n def get_value(self):\n return self.value\n\n def update_value(self, new_value=aot.abstractify(update_example)):\n self.value = new_value\n
-
All named parameters on a nn.Module
can be exported using export_parameters()
:
class SimpleParams(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.classifier = torch.nn.Linear(20, 30)\n\n def forward(self, x):\n return self.classifier(x)\n\nm = SimpleParams()\n\nclass SimpleParamsModule(aot.CompiledModule):\nparams = aot.export_parameters(m)\ncompute = aot.jittable(m.forward)\n\n def run(self, x=aot.AbstractTensor(128, 20)):\n return self.compute(x)\n\n# torch.nn.Linear has 'weight' and 'bias' variables:\n# https://pytorch.org/docs/stable/generated/torch.nn.Linear.html\n# Add getters for both exported parameters.\ndef get_weight(self):\nreturn self.params[\"classifier.weight\"]\ndef get_bias(self):\nreturn self.params[\"classifier.bias\"]\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_2","title":"Samples","text":"Code samples Advanced AOT export notebook PyTorch dynamic shapes notebook Unit tests tests/aot/
Dynamic MLP export examples/aot_mlp/mlp_export_dynamic.py
llama2 inference example examples/llama2_inference/stateless_llama.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#alternate-workflows","title":"Alternate workflows","text":"Caution - These are due for migration to SHARK-Turbine.
Code samples (Deprecated) Inference on BERT","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#native-on-device-training","title":"Native / on-device training","text":"A small (~100-250KB), self-contained binary can be built for deploying to resource-constrained environments without a Python interpreter.
Example scripts Basic Inference and Training Example Native On-device Training Example","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/tensorflow/","title":"TensorFlow integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#overview","title":"Overview","text":"IREE supports compiling and running TensorFlow programs represented as tf.Module
classes or stored in the SavedModel
format.
graph LR\n accTitle: TensorFlow to runtime deployment workflow overview\n accDescr {\n Programs start as either TensorFlow SavedModel or tf.Module programs.\n Programs are imported into MLIR as StableHLO.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n subgraph A[TensorFlow]\n direction TB\n A1[SavedModel]\n A2[tf.Module]\n\n A1 --- A2\n end\n\n subgraph B[MLIR]\n B1[StableHLO]\n end\n\n C[IREE compiler]\n D[Runtime deployment]\n\n A -- iree-import-tf --> B\n B --> C\n C --> D
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#prerequisites","title":"Prerequisites","text":" -
Install TensorFlow by following the official documentation:
python -m pip install tf-nightly\n
-
Install IREE packages, either by building from source or from pip:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime \\\niree-tools-tf\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime \\\niree-tools-tf\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#importing-models","title":"Importing models","text":"IREE compilers transform a model into its final deployable format in several sequential steps. The first step for a TensorFlow model is to use either the iree-import-tf
command-line tool or IREE's Python APIs to import the model into a format (i.e., MLIR) compatible with the generic IREE compilers.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#from-savedmodel-on-tensorflow-hub","title":"From SavedModel on TensorFlow Hub","text":"IREE supports importing and using SavedModels from TensorFlow Hub.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#using-the-command-line-tool","title":"Using the command-line tool","text":"First download the SavedModel and load it to get the serving signature, which is used as the entry point for IREE compilation flow:
import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\nprint(list(loaded_model.signatures.keys()))\n
Note
If there are no serving signatures in the original SavedModel, you may add them by yourself by following \"Missing serving signature in SavedModel\".
Then you can import the model with iree-import-tf
. You can read the options supported via iree-import-tf -help
. Using MobileNet v2 as an example and assuming the serving signature is predict
:
iree-import-tf\n --tf-import-type=savedmodel_v1 \\\n--tf-savedmodel-exported-names=predict \\\n/path/to/savedmodel -o iree_input.mlir\n
Tip
iree-import-tf
is installed as /path/to/python/site-packages/iree/tools/tf/iree-import-tf
. You can find out the full path to the site-packages
directory via the python -m site
command.
Tip
-tf-import-type
needs to match the SavedModel version. You can try both v1 and v2 if you see one of them gives an empty dump.
Next, you can compile the model in iree_input.mlir
for one of IREE's supported targets by following one of the deployment configuration guides.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#samples","title":"Samples","text":"Colab notebooks Training an MNIST digits classifier Edge detection Pretrained ResNet50 inference TensorFlow Hub import End-to-end execution tests can be found in IREE's integrations/tensorflow/e2e/ directory.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#troubleshooting","title":"Troubleshooting","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#missing-serving-signature-in-savedmodel","title":"Missing serving signature in SavedModel","text":"Sometimes SavedModels are exported without explicit serving signatures. This happens by default for TensorFlow Hub SavedModels. However, serving signatures are required as entry points for IREE compilation flow. You can use Python to load and re-export the SavedModel to give it serving signatures. For example, for MobileNet v2, assuming we want the serving signature to be predict
and operating on a 224x224 RGB image:
import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\ncall = loaded_model.__call__.get_concrete_function(\n tf.TensorSpec([1, 224, 224, 3], tf.float32))\nsignatures = {'predict': call}\ntf.saved_model.save(loaded_model,\n '/path/to/resaved/model/', signatures=signatures)\n
The above will create a new SavedModel with a serving signature, predict
, and save it to /path/to/resaved/model/
.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/","title":"TensorFlow Lite integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#overview","title":"Overview","text":"IREE supports compiling and running TensorFlow Lite (TFLite) programs stored as TFLite FlatBuffers. These files can be imported into an IREE-compatible format then compiled to a series of backends.
graph LR\n accTitle: TFLite to runtime deployment workflow overview\n accDescr {\n Programs start as TensorFlow Lite FlatBuffers.\n Programs are imported into MLIR's TOSA dialect using iree-import-tflite.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n subgraph A[TFLite]\n A1[FlatBuffer]\n end\n\n subgraph B[MLIR]\n B1[TOSA]\n end\n\n C[IREE compiler]\n D[Runtime deployment]\n\n A -- iree-import-tflite --> B\n B --> C\n C --> D
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#prerequisites","title":"Prerequisites","text":" -
Install TensorFlow by following the official documentation:
python -m pip install tf-nightly\n
-
Install IREE packages, either by building from source or from pip:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime \\\niree-tools-tflite\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime \\\niree-tools-tflite\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#importing-and-compiling","title":"Importing and Compiling","text":"IREE's tooling is divided into two components: import and compilation.
- The import tool converts the TFLite FlatBuffer to an IREE compatible form, validating that only IREE compatible operations remain. Containing a combination of TOSA and IREE operations.
- The compilation stage generates the bytecode module for a list of targets, which can be executed by IREE.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-command-line-tools","title":"Using Command Line Tools","text":"These two stages can be completed entirely via the command line.
WORKDIR=\"/tmp/workdir\"\nTFLITE_URL=\"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\nTFLITE_PATH=${WORKDIR}/model.tflite\nIMPORT_PATH=${WORKDIR}/tosa.mlir\nMODULE_PATH=${WORKDIR}/module.vmfb\n\n# Fetch the sample model\nwget ${TFLITE_URL} -O ${TFLITE_PATH}\n\n# Import the sample model to an IREE compatible form\niree-import-tflite ${TFLITE_PATH} -o ${IMPORT_PATH}\n\n# Compile for the CPU backend\niree-compile \\\n--iree-input-type=tosa \\\n--iree-hal-target-backends=llvm-cpu \\\n${IMPORT_PATH} \\\n-o ${MODULE_PATH}\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-the-python-api","title":"Using the Python API","text":"The example below demonstrates downloading, compiling, and executing a TFLite model using the Python API. This includes some initial setup to declare global variables, download the sample module, and download the sample inputs.
Declaration of absolute paths for the sample repo and import all required libraries. The default setup uses the CPU backend as the only target. This can be reconfigured to select alternative targets.
import iree.compiler.tflite as iree_tflite_compile\nimport iree.runtime as iree_rt\nimport numpy\nimport os\nimport urllib.request\n\nfrom PIL import Image\n\nworkdir = \"/tmp/workdir\"\nos.makedirs(workdir, exist_ok=True)\n\ntfliteFile = \"/\".join([workdir, \"model.tflite\"])\njpgFile = \"/\".join([workdir, \"input.jpg\"])\ntfliteIR = \"/\".join([workdir, \"tflite.mlir\"])\ntosaIR = \"/\".join([workdir, \"tosa.mlir\"])\nbytecodeModule = \"/\".join([workdir, \"iree.vmfb\"])\n\nbackends = [\"llvm-cpu\"]\nconfig = \"local-task\"\n
The TFLite sample model and input are downloaded locally.
tfliteUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\njpgUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8_input.jpg\"\n\nurllib.request.urlretrieve(tfliteUrl, tfliteFile)\nurllib.request.urlretrieve(jpgUrl, jpgFile)\n
Once downloaded we can compile the model for the selected backends. Both the TFLite and TOSA representations of the model are saved for debugging purposes. This is optional and can be omitted.
iree_tflite_compile.compile_file(\n tfliteFile,\n input_type=\"tosa\",\n output_file=bytecodeModule,\n save_temp_tfl_input=tfliteIR,\n save_temp_iree_input=tosaIR,\n target_backends=backends,\n import_only=False)\n
After compilation is completed we configure the VmModule using the local-task configuration and compiled IREE module.
config = iree_rt.Config(\"local-task\")\ncontext = iree_rt.SystemContext(config=config)\nwith open(bytecodeModule, 'rb') as f:\n vm_module = iree_rt.VmModule.from_flatbuffer(config.vm_instance, f.read())\n context.add_vm_module(vm_module)\n
Finally, the IREE module is loaded and ready for execution. Here we load the sample image, manipulate to the expected input size, and execute the module. By default TFLite models include a single function named 'main'. The final results are printed.
im = numpy.array(Image.open(jpgFile).resize((192, 192))).reshape((1, 192, 192, 3))\nargs = [im]\n\ninvoke = context.modules.module[\"main\"]\niree_results = invoke(*args)\nprint(iree_results)\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#samples","title":"Samples","text":" -
The tflitehub folder in the iree-samples repository contains test scripts to compile, run, and compare various TensorFlow Lite models sourced from TensorFlow Hub.
-
An example smoke test of the TensorFlow Lite C API is available here.
Colab notebooks Text classification with TFLite and IREE","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#troubleshooting","title":"Troubleshooting","text":"Failures during the import step usually indicate a failure to lower from TensorFlow Lite's operations to TOSA, the intermediate representation used by IREE. Many TensorFlow Lite operations are not fully supported, particularly those than use dynamic shapes. Please reach out on one of IREE's communication channels if you notice something missing.
","tags":["Python","TensorFlow"]},{"location":"reference/","title":"Reference pages","text":""},{"location":"reference/#api-bindings","title":"API bindings","text":"IREE offers API bindings for compiling and running programs from various languages.
- Index page
"},{"location":"reference/#mlir-dialects","title":"MLIR dialects","text":"Automatically generated documentation for the MLIR dialects defined in the IREE repository.
- Index page
"},{"location":"reference/#other-topics","title":"Other topics","text":" - Glossary
- Optimization options
- Extensions
"},{"location":"reference/extensions/","title":"Extension mechanisms","text":"Note
Much of this describes provisions for extension within IREE but until the core of the system has settled little work will be done to fully flesh-out and document them in detail. A large majority of things that would make someone want to extend IREE can instead be accomplished much easier and performantly using native MLIR dialects that are then processed by the IREE compiler.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#guidelines","title":"Guidelines","text":"IREE has a compiler and runtime separation, a multi-layered architecture, and split between execution of \"host code\" that schedules compute-heavy work and SPMD \"device code\" that performs the bulk of compute operations. Each axis has a different set of extension mechanisms that can be used independently or combined.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#extension-philosophy","title":"Extension philosophy","text":"Organized below are some of the mechanisms IREE provides for extending the core compiler and runtime and when they should(n't) be used. The goal of these progressively lower-level extension mechanisms is to make it easier for users to fall into the pit of success:
Quote
\"a well-designed system makes it easy to do the right things and annoying (but not impossible) to do the wrong things.\" - Jeff Atwood
The amount of engineering complexity for initial bring-up and maintenance increases with each subsequently lower-level approach and it is best to start from the top and exit as fast as possible: this is a choose-your-own-adventure where you're trying to escape the dungeon with both the loot and your limbs . Avoid the temptation of immediately dropping down to making external C calls at runtime because that's how it's been done before as it's easier, more robust, and more performant to use the system as it is intended to be used.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-extend","title":"When to extend","text":"The primary goal when extending any framework should first be to avoid extending it at all. There is no mechanism that is free - whether in terms of engineering effort to develop and maintain over time, include in compiler deployments, or include in runtime deployments. As a system scales in deployment configurations the available mechanisms for extension increase but so too does the chaos introduced by extensions that do not also scale with that design. Users are the only ones who can determine the tradeoffs they are willing to accept: for example, the mechanism to extend device code with a custom runtime call to a C function does not work on GPUs and gets significantly more complicated on CPUs as sandboxes/enclaves are used - but if the user scenario is for local process CPU-only execution that may not matter.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-inputscompilerruntime","title":"Where to extend (inputs/compiler/runtime)","text":"Consider in normal software development when one would choose to write more code (possibly packaging it into a reusable library) vs. changing the programming language or compiler they are using to compile their code vs. changing the operating systems their code runs on. The further one gets from the problem they are trying to solve the more work, coordination, and maintenance is involved and though there are reasons to make changes across the stack they should be done only when a simpler solution would not suffice.
An author will retain more control over their logic the closer they sit to the inputs to the compiler. IREE provides several mechanisms that try to keep control with the author and robust to changes in IREE or MLIR internals and it is strongly encouraged that those looking to extend take those routes first. Contributions that help everyone are very welcome but do have a higher cost and it's often much easier to design and justify upstream changes with working examples in forks or at higher levels of the stack.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-hostdevice","title":"Where to extend (host/device)","text":"From a performance perspective the rule is to colocate code with the data it is acting on: tensor data, for example, should almost exclusively be manipulated by device code as tensors live on device. Attempting to use tensor data with host code will result in synchronization points and host/device transfers that can decimate performance. This can lead to seemingly paradoxical situations where swapping out compiler-generated code for a human-authored \"fast path\" can be slower than even the most naive compiler results. An important thing to keep in mind with compilers is that it is exceedingly difficult to produce code by hand that is consistently more performant across a broad range of deployments and the first temptation should always be to improve the compiler - extending it via other mechanisms when not required by the task is often just premature optimization.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#1-target-iree-input-dialects","title":"1. Target IREE input dialects","text":"TL;DR
Convert your custom ops into standard MLIR dialects.
+------------+ +--------+ +---------------+\n| Your input | -+-> | iree | -+-> | IREE compiler |\n+------------+ | +--------+ | +---------------+\n | +--------+ |\n +-> | linalg | -+\n | +--------+ |\n | .... |\n
The easiest, cleanest, and most robust path to extend IREE is to make use of what MLIR is designed for: composing dialects and converting between them. IREE supports several input dialects such as tosa
, mhlo
, linalg
, and the standard arith
, math
, tensor
, and scf
dialects. Any source IR that can be turned into that mix of dialects (directly or transitively) will work with the whole IREE pipeline for all deployment configurations and targets. If possible to express the computation in this form it will always be the best route to getting small deployments without the need to modify or include any additional code at runtime and run on all device types and execution modes.
This mechanism can also be layered with any of the subsequent lower-level ones: if some part of the operation runs on the host and some part on device then decomposing it such that it contains as many standard ops for flow control as possible and linear algebra/custom ops for the dense math will reduce the engineering effort required on both sides and lead to an easier to maintain solution even if lower-level extension is required.
A large majority of classic ML \"custom ops\" can be accomplished with this approach. When bringing up projects built on IREE it's best to concisely describe the operation in more elemental mathematical representations and then add optimizations where required knowing that things will still work even if those optimizations never happen.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros","title":"Pros","text":" - No IREE compiler or runtime code changes required.
- Can use standard IREE packaged releases and tools.
- No versioning issues at runtime.
- IREE's host/device partitioning can partition your code.
- Fusion and other compiler techniques (CSE/DCE/inlining/etc) work on your code.
- All target backends (CPU/GPU/accelerators/enclaves/etc) work.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons","title":"Cons","text":" - Input dialects cannot natively represent all possible programs (such as file IO and other syscalls).
- Performance-sensitive host code (b-trees and other in-memory databases) will run through the slower VM paths if not authored as dense compute.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use","title":"When to use","text":" - Targeting multiple MLIR toolchains of which IREE is just one (as little to no IREE-specific code is required).
- Operation represents host code in addition to device code.
- All code is known statically or symbolically at compile-time (instead of independently versioned libraries at runtime).
- Complex high-performance code not representable as linear algebra.
- External runtime interactions (file/network/user IO). Use custom modules.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation","title":"Implementation","text":"To make use of this approach one just needs to follow the standard MLIR dialect conversion behavior: add a dialect with ops, add a conversion pass, and run that pass before providing the resulting IR to the IREE compiler. See Creating a Dialect.
Think of this like authoring C++ sources with templates that you compile into your application: Clang (and LLVM beyond) don't know about your library details and instead just process it as it would any other code. You can take the same source and pass it to GCC and it'll be robust to underlying changes in the system.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#2-extend-host-code-with-custom-modules","title":"2. Extend host code with custom modules","text":"TL;DR
Import MLIR functions in the compiler and custom modules at runtime.
// Main user module compiled by IREE:\nmodule @model {\n // Declare a synchronous external function:\n func.func private @my_custom_module.sync_func(%input: tensor<?xf32>) -> i32\n // Declare an asynchronous external function:\n func.func private @my_custom_module.async_func(%input: tensor<?xf32>) -> tensor<?xf32> attributes {\n iree.abi.model = \"coarse-fences\",\n nosideeffects\n }\n func.func @predict() {\n ...\n // Call a synchronous/blocking external function:\n %sync_result = call @my_custom_module.sync_func(%sync_input) : (tensor<?xf32>) -> i32\n ...\n ...\n // Call an asynchronous/non-blocking external function:\n %async_result = call @my_custom_module.async_func(%async_input) : (tensor<?xf32>) -> tensor<?xf32>\n ...\n }\n}\n
IREE provides dynamic linking at runtime via its VM interfaces. For code that runs on the host and requires syscalls or calling out to existing libraries - such as file IO, text processing, and JPEG decoding - this is an easy way to interop without paying attention to the more complex details of device code. An IREE module compiled using custom modules is portable and dynamically deployable so long as the custom module is registered at runtime.
This approach conceptually matches what normal native binaries do in an OS: imports are declared and at runtime they are resolved based on the available exports of modules in the system. Just as with normal systems engineering design of the API between modules is up to the user and depending on rigor can have several pitfalls but these problems and their solutions are not IREE specific and anyone who has designed a shared library interface can apply the same rules here in IREE around versioning, performance, etc. One does not add 2 integers via a syscall and the same holds here: custom modules and the functions within should perform a large amount of work to hide overheads involved in the cross-module calls and users must be aware that the compiler cannot optimize across the call boundaries.
See the synchronous tensor I/O and asynchronous tensor I/O samples.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_1","title":"Pros","text":" - No IREE compiler code changes required.
- Produced artifacts are portable across IREE deployment configurations.
- Full system access is allowed - the VM just calls external functions.
- Runtime modules can be implemented (via shims) in other languages/runtimes.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_1","title":"Cons","text":" - Custom modules must be registered at runtime by the user.
- The VM custom module ABI goo must be authored by the user (such as with JNI or pybind to move between java/python and C).
- All custom module code must be compiled and deployed regardless of how much any modules use. The granularity of modules and their versioning is up to the user.
- Custom module code cannot be optimized by the IREE compiler to avoid host/device readbacks and unnecessary data type conversion.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_1","title":"When to use","text":" - Interactions with large libraries or system calls.
- Performance-sensitive host code that cannot easily be represented as device code (like UTF-8 string transformation using libicu).
- Extensively using tensor resources.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_1","title":"Implementation","text":"The runtime portion requires that the code be exported to the VM system by way of an iree_vm_module_t
interface. A low-level native interface exists with minimal overhead and is used for example by the IREE HAL itself. There is also a C++ wrapper that is significantly easier to work with however it needs some performance improvements.
Full end-to-end examples can be found under samples/custom_modules/
:
- The basic sample shows how to add VM modules with custom types and take advantage of ABI features like fallback functions and optional imports.
- The synchronous tensor I/O sample shows a call taking and returning a tensor and performing blocking work.
- The asynchronous tensor I/O sample shows the same thing but with fences for asynchronous scheduling.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#3-extend-target-specific-device-conversion-patterns","title":"3. Extend target-specific device conversion patterns","text":"TL;DR
Add patterns to iree/Compiler/Codegen/
to emit target code.
The easiest and most robust path for specializations of device code is to emit such code mixed with the IREE compiler generated code at the highest possible level of abstraction within the target pipeline. For example, if the code can be represented with the vector
dialect then inserting conversion patterns between linalg
and vector
enables the emitted code to be specialized further based on user configuration and optimized with the full set of available passes that run in the pipeline. For each level lower one goes the more flexibility they gain such as being able to emit inline assembly blocks that do anything while trading off generality and multi-targeting applicability.
How much the tradeoff matters is based on the behavior of the extension. If a pattern changing a transcendental function to an approximation can operate at the vector level then all IREE deployment targets can benefit from the pattern and as new targets are made available they will automatically receive the benefits. In contrast, a pattern at the vector level that turns generic vector operations into architecture-specific LLVM intrinsics by its nature only pertains to a single target family and can be done at a lower level. As a rule of thumb if a particular pattern is going to need ~N implementations for ~N targets that are all mostly the same it's better to try to move that higher in the stack.
At this point the complexity of extending things is still fairly constrained: a C++ pass or pattern is verified with normal lit tests and can be upstreamed easily either into MLIR or IREE (a large number of IREE patterns are upstreamed, benefiting all users of MLIR). Cross-compilation and versioning are not a factor and the IREE artifacts can be considered durable at a coarse level (outside of major target architectural changes).
Note that depending on the target there are various mechanisms for representing code in MLIR, up to including inline assembly snippets in IR via llvm.inline_asm
.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_2","title":"Pros","text":" - Not limited to what is possible to represent in any particular MLIR dialect.
- Rich target configuration available; multiple passes can contribute info.
- Produced executable binaries are hermetic and no runtime changes are required.
- Specialization can happen in MLIR dialects like
linalg
or vector
as well as target-specific representations like SPIR-V and LLVM IR. - The compiler can perform deep optimizations across both the generated code and the provided code (hoisting/loop invariant code motion/cse/etc).
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_2","title":"Cons","text":" - Requires implementing the patterns as code in the IREE compiler or via TBD interfaces.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_2","title":"When to use","text":" - Code that must be emitted during target lowering - such as something optimizing for a particular CPU architecture.
- Hot code mixed with generated code at a fine granularity (within the innermost loop).
- External existing hand-authored libraries. Either statically or dynamically link instead.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_2","title":"Implementation","text":"There are several ways to author patterns and passes in MLIR. As examples:
- A majority of patterns are authored in C++ using PatternRewriter.
- PDL is an MLIR-based way to express rewrite operations with strong typing, compile-time verification, and easily-readable and less-verbose IR.
linalg
uses a python-based DSL for defining some of its extended ops.
There are many examples within both MLIR and IREE, one specifically being the polynomial approximation expansion patterns.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#4-include-external-target-specific-device-code","title":"4. Include external target-specific device code","text":"TL;DR
Statically link external object files into IREE executables.
For large bodies of existing device code or library calls that are available for static linkage the work involved to reimplement them at higher levels of the stack can be cost prohibitive even if it leads to better results. In these cases just as with a normal toolchain one would just want to declare an external function, call it, and add the object file to the linker command line. In IREE the same can be performed by way of taking compatible bitcode or native object files and linking them in with the generated code. An MLIR pattern would declare and emit the call and the target-specific IREE linker would pull in the objects.
As the linking behavior varies per target (for example, some targets like SPIR-V don't have traditional linkers) how this is performed is up to the IREE target backends. The complexity involved in producing the object files to link will also vary per-backend and the complexity of the deployment: cross-compiling for multiple architectures or compilation modes (ASAN, etc) will require unique copies of the object files matching that precise configuration.
At this point generality is largely out as is the ability to cleanly upstream such files. It should be apparent how a few dozen lines of C++ or PDL that avoids the need for any of this complexity is more appealing. In extremely specific cases of a single platform/architecture/version for a single program deployed via a specific artifact composition it's not so bad but IREE is designed such that extreme specificity is an optional mode of the more general solution. This does not mean this mechanism is not useful in some situations and only that it should be a last-resort when one of the easier to manage solutions is not viable - not a shortcut to avoid writing some C++ patterns.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_3","title":"Pros","text":" - Works with hand-authored code in compatible object files from any toolchain.
- No IREE runtime changes required.
- All deployment modes still work, including multi-targeting.
- No versioning concerns as custom code is included in artifacts.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_3","title":"Cons","text":" - Users must provide per-target precompiled object files on disk.
- IREE compiler changes are still needed for generating the external calls.
- Though LTO may be able to optimize across the calls it is not guaranteed.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_3","title":"When to use","text":" - Existing math libraries or architecture-specific functions that cannot be ported into a more MLIR-friendly form.
- Mixing in hand-authored code written in C/rust/etc with generated code from MLIR.
- External code can be represented as either
linalg
, vector
, or LLVM IR. Use target-specific conversion patterns instead. - External code size is large and unlikely to benefit from link-time optimizations (such as something like libjpeg). Dynamically link instead.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_3","title":"Implementation","text":"As the linking behavior varies per target backend there is no general solution at this level: if targeting the CPU then the system native linker or lld need to be provided the object files, while SPIR-V will need to merge the SPIR-V binaries directly, and Metal shader libraries will need to be constructed with the Apple-specific metallib
tooling. Producing these files and performing the linking is outside the scope of IREE.
If the files can be acquired then compiler changes will be required to emit calls to them and invoke the linker with the the files.
On the CPU an alternative is to use the static library output mode where IREE produces an object file and then the user invokes the linker themselves; this still requires the compiler changes to emit the calls but avoids needing to teach the compiler how to link the files.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#5-dynamically-link-target-specific-device-code-cpu-only","title":"5. Dynamically link target-specific device code (CPU only)","text":"TL;DR
Dynamically link external C functions at runtime from device code.
It is pitch black. You are likely to be eaten by a grue.
This is the lowest-level integration in the system and is designed to act as an escape hatch and - as with any emergency escape hatch - it's not designed for ergonomics. Users should try first to come in through the door and attempting to use this mechanism should trigger alarms about the approach being attempted.
IREE's execution model for device code and native machine binary deployment mechanisms are designed with several constraints in order to make all of the above approaches possible and performant. Calling arbitrary C functions from deep within the system can introduce subtle (and not-so-subtle) bugs that are extremely difficult to track down and versioning between the compiler emitting the calls and the runtime providing the implementations can cause skew unless held carefully. Consider the methods added here like syscalls in that they must be extremely focused and if they are ever likely to change (including being removed) then care will be needed just as with versioning or redirecting a syscall. Designing good stable interfaces is hard and a classic pit of failure.
Some things to note:
- Device code executes in a tiled fashion and single dispatches may invoke the same function many times from many threads concurrently to perform the larger work.
- Tiles may execute in any order and on any thread; performing fine-grained locking within the tile can lead to deadlocks.
- Device code is stateless in order to allow for access restrictions and caching across multiple loaded models - any library state required must be externally managed via process globals.
- Device code may be running out-of-process (sandbox/enclave) and the library functions must be available where the dispatches run and not where they are launched (such as being linked into the sandbox binary, if separate from the main process binary).
- The stack must be used to pass arguments/results to external calls via a single pointer and there is no libffi-like functionality for magically calling arbitrary C functions. Users must provide the shims they need.
- Thread-local storage is unavailable in the called code (it may be usable, but it is not guaranteed it'll work on all platforms and leaks are likely).
- No heap allocator is provided and the use of libc malloc is unsupported.
Most of the constraints here come from the SPMD parallelism model, platform-agnostic deployment format, and overall data-oriented design of IREE. Code operating in this fashion has a certain shape and that is usually not the same as big legacy single-threaded CPU-focused BLAS libraries that perform their own caching, internal thread and state management, and other shenanigans. IREE is not designed to wrap such things and if any of these notes are issues it is more an indicator that the approach needs adjustment than anything else. Trying to bypass or workaround the constraints is possible - after all IREE is an open source project and any user is welcome to fork it - but unsupported by the core IREE team.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_4","title":"Pros","text":" - Function resolution at runtime is orthogonal to compiler target specification.
- Machine code can be shared between the application and IREE artifacts.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_4","title":"Cons","text":" - IREE compiler and runtime must both be modified.
- Deeper integration with the IREE codegen compiler infrastructure required.
- ABI versioning complexity between compiler and runtime.
- Runtimes must ship the imports for the lifetime of any artifact compiled to use them.
- Humans are bad at predicting the future.
- Using the same artifact in different binaries at runtime requires changes to each binary - including those that may not be owned by the person producing the artifact.
- Weak imports and conditional usage can help but still leads to bloat.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_4","title":"When to use","text":" - Calling into opaque closed-source BLAS-like microkernel libraries.
- Any other cases covered above can be used, especially microkernels that can be represented in MLIR or as statically linked libraries.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_4","title":"Implementation","text":"The compiler is changed to produce calls to imports via a dynamic import table provided to each dispatch function. The import table is declared in the executable library for use at runtime. Runtime applications register an import provider to resolve named symbols in the import table to C functions that marshal arguments and results.
The compiler-side needs some additional work but an example is included here: Issue 7504. The runtime-side is complete and resolution is performed by a user-supplied iree_hal_executable_import_provider_t
.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/","title":"Glossary","text":"IREE exists in an ecosystem of projects and acts as a bridge between machine learning frameworks and a variety of hardware platforms. This glossary outlines some of those projects and technologies.
Something missing?
Don't see a project of technology here that you think should be? We welcome contributions on our GitHub page!
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#jax","title":"JAX","text":"JAX is Python framework supporting high-performance machine learning research by bridging automatic differentiation and ML compilers like XLA and IREE.
See the JAX Integration guide for details on how to use JAX programs with IREE.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#mlir","title":"MLIR","text":"Multi-Level Intermediate Representation (MLIR) is the compiler framework that IREE is built around. Beyond the tooling this includes a set of common dialects and transformations that IREE utilizes for its code generation system.
For general discussion on MLIR see the project's discourse forum.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#linalg","title":"Linalg","text":"Linalg is an MLIR dialect that defines Linear Algebra operations in a generalized fashion by modeling iteration spaces together with compute payloads. Linalg includes a set of commonly used operations as well as generic interfaces.
IREE uses the Linalg dialect during its code generation pipeline to define tensor operations then generate loop structures for its various backend targets.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#openxla","title":"OpenXLA","text":"OpenXLA is a community-driven, open source ML compiler ecosystem.
IREE is one project under the OpenXLA GitHub Organization, and it interfaces with many of the other projects, such as StableHLO.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#pytorch","title":"PyTorch","text":"PyTorch is an optimized tensor library for deep learning.
PyTorch uses the Torch-MLIR project to interface with projects like IREE. See the PyTorch Integration guide for details on how to use PyTorch programs with IREE.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#spir-v","title":"SPIR-V","text":"SPIR-V is a shader and kernel intermediate language for expressing parallel computation typically used for GPUs. It serves as a hardware agnostic assembly format for distributing complex, computationally intensive programs.
IREE uses the SPIR-V MLIR Dialect in its code generation pipeline for Vulkan and other compute APIs.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#stablehlo","title":"StableHLO","text":"StableHLO is a set of versioned high-level operations (HLOs) for ML models with backward and forward compatibility guarantees. StableHLO aims to improve interoperability between frameworks (such as TensorFlow, JAX, and PyTorch) and ML compilers.
StableHLO has both a specification and an MLIR dialect.
IREE uses the StableHLO MLIR Dialect as one of its input formats.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tosa","title":"TOSA","text":"Tensor Operator Set Architecture (TOSA) provides a set of tensor operations commonly employed by Deep Neural Networks. TOSA defines accuracy and compatibility constraints so frameworks that use it can trust that applications will produce similar results on a variety of hardware targets.
TOSA has both a specification and an MLIR dialect.
IREE uses the TOSA MLIR dialect as one of its input formats.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tflite","title":"TFLite","text":"TensorFlow Lite (TFLite) is a library for deploying models on mobile and other edge devices.
IREE supports running TFLite programs that have been imported into MLIR using the TOSA dialect. See the TFLite Integration guide for details on how to use TFLite programs with IREE.
IREE also has bindings for the TFLite C API, see the runtime/bindings/tflite/
directory for details.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/optimization-options/","title":"Optimization options","text":"This page documents various supported flags for optimizing IREE programs. Each is presented with its English name, flag to enable/disable, and default state.
These flags can be passed to the:
iree-compile
command line tool extra_args=[\"--flag\"]
argument to iree.compiler.tools
Python wrappers - In-process Python compiler API
iree.compiler.transforms.iree-compile.CompilerOptions(\"--flag\", \"--flag2\")
constructor ireeCompilerOptionsSetFlags()
compiler C API function
"},{"location":"reference/optimization-options/#high-level-program-optimizations","title":"High level program optimizations","text":""},{"location":"reference/optimization-options/#constant-evaluation-iree-opt-const-eval-on","title":"Constant evaluation (--iree-opt-const-eval
(on))","text":"Performs compile-time evaluation of any global initializers which produce the initial values for global constants, storing the global directly in the program as constant data. This extracts such constant program fragments and recursively compiles them, using the runtime to evaluate the results.
Note that this only has any effect on computations in module initializer functions, not free-standing operations in the program which may produce constant-derived results. See --iree-opt-const-expr-hoisting
for options to optimize these.
"},{"location":"reference/optimization-options/#constant-expression-hoisting-iree-opt-const-expr-hoisting-off","title":"Constant expression hoisting (--iree-opt-const-expr-hoisting
(off))","text":"Identifies all trees of constant expressions in the program and uses a heuristic to determine which would be profitable to hoist into global initializers for evaluation at module load. Together with --iree-opt-const-eval
, this will convert eligible trees of expressions to purely static data embedded in the module.
The heuristic is currently relatively primitive, using static information to disable hoisting of leaf operations which are metadata only (i.e. broadcasts, etc) or are expected to fold away as part of operator fusion. Notably, the current heuristic is likely to pessimize module size in the case of complicated programs with trees of constant, large tensors.
"},{"location":"reference/optimization-options/#numeric-precision-reduction-iree-opt-numeric-precision-reduction-off","title":"Numeric precision reduction (--iree-opt-numeric-precision-reduction
(off))","text":"Analyzes program constant data and program flow to identify math operations which can be safely evaluated with reduced precision (currently with a minimum of 8bit integers but being extended to infer any bit depth) and inserts appropriate casts. In conjunction with Constant Expression Hoisting, Constant Evaluation and other automatic optimizations, this can produce programs where large amounts (up to the whole) have had their numeric operations and constant data rewritten to lower precision types.
This feature is actively evolving and will be the subject of dedicated documentation when ready.
"},{"location":"reference/optimization-options/#strip-debug-assertions-iree-opt-strip-assertions-off","title":"Strip Debug Assertions (--iree-opt-strip-assertions
(off))","text":"Strips all std.assert
ops in the input program after useful information for optimization analysis has been extracted. Assertions provide useful user-visible error messages but can prevent critical optimizations. Assertions are not, however, a substitution for control flow and frontends that want to check errors in optimized release builds should do so via actual code - similar to when one would if (foo) return false;
vs. assert(foo);
in a normal program.
"},{"location":"reference/bindings/","title":"API bindings","text":"API bindings allow for programmatic use of IREE's compiler and runtime components. The core IREE project is written in C1, allowing for API bindings to be written in a variety of other languages.
Something missing?
Want to use another language? Looking for something specific out of one of those already listed?
We welcome discussions on our communication channels and contributions on our GitHub page!
"},{"location":"reference/bindings/#official-api-bindings","title":"Official API bindings","text":"Members of the core project team and OpenXLA partners maintain these official bindings:
Language Compiler API? Runtime API? Published packages? C/C++ Supported Supported Unsupported Python Supported Supported Supported JavaScript Experimental Experimental Unsupported"},{"location":"reference/bindings/#cc","title":"C/C++","text":"See the C API reference page.
"},{"location":"reference/bindings/#python","title":"Python","text":"See the Python reference page.
"},{"location":"reference/bindings/#javascript","title":"JavaScript","text":" - JavaScript bindings for WebAssembly and WebGPU are under development in IREE's
experimental/web/
directory.
"},{"location":"reference/bindings/#unofficial-api-bindings","title":"Unofficial API bindings","text":"Members of our developer community have authored bindings using other languages:
Language Compiler API? Runtime API? Published packages? Julia Experimental Experimental Unsupported Rust Unsupported Experimental Experimental"},{"location":"reference/bindings/#julia","title":"Julia","text":" - Coil.jl is an experimental package to lower and execute Julia tensor operations to IREE.
"},{"location":"reference/bindings/#rust","title":"Rust","text":" - iree-rs is a crate containing rustic bindings for the IREE runtime.
-
with some C++ tools and utilities\u00a0\u21a9
"},{"location":"reference/bindings/c-api/","title":"C API bindings","text":""},{"location":"reference/bindings/c-api/#overview","title":"Overview","text":"The IREE compiler and IREE runtime both have their own C/C++ APIs. This page introduces the available APIs and describes how to use them from your applications.
Note
There are multiple ways to distribute and depend on C/C++ projects, each with varying levels of portability, flexibility, and toolchain compatibility. IREE aims to support common configurations and platforms.
"},{"location":"reference/bindings/c-api/#compiler-api","title":"Compiler API","text":"The IREE compiler is structured as a monolithic shared object with a dynamic plugin system allowing for extensions. The shared object exports symbols for versioned API functions.
graph TD\n accTitle: IREE compiler linkage model diagram\n accDescr {\n The libIREECompiler.so or IREECompiler.dll shared object contains pipelines,\n target backends, and general passes as private implementation details.\n Compiler plugins interface with the compiler shared object to extend it with\n custom targets, dialects, etc.\n Applications interface with the compiler shared object through the compiler\n C API's exported symbols.\n }\n\n subgraph compiler[libIREECompiler.so / IREECompiler.dll]\n pipelines(\"Pipelines\n\n \u2022 Flow\n \u2022 Stream\n \u2022 etc.\")\n\n targets(\"Target backends\n\n \u2022 llvm-cpu\n \u2022 vulkan-spirv\n \u2022 etc.\")\n\n passes(\"General passes\n\n \u2022 Const eval\n \u2022 DCE\n \u2022 etc.\")\n end\n\n plugins(\"Compiler plugins\n\n \u2022 Custom targets\n \u2022 Custom dialects\n \u2022 etc.\")\n\n application(Your application)\n\n compiler <-- \"Plugin API<br>(static or dynamic linking)\" --> plugins\n compiler -. \"Compiler C API<br>(exported symbols)\" .-> application
API definitions can be found in the following locations:
Source location Overview iree/compiler/embedding_api.h
Top-level IREE compiler embedding API iree/compiler/PluginAPI/
directory IREE compiler plugin API mlir/include/mlir-c/
directory MLIR C API headers"},{"location":"reference/bindings/c-api/#concepts","title":"Concepts","text":"The compiler API is centered around running pipelines to translate inputs to artifacts. These are modeled via sessions, invocations, sources, and outputs.
stateDiagram-v2\n accTitle: IREE compiler session and invocation state diagram\n accDescr {\n Input files are opened (or buffers are wrapped) as sources in a session.\n Sources are parsed into invocations, which run pipelines.\n Output files are written (or buffers are mapped) for compilation artifacts.\n Sessions can contain multiple sources and run multiple invocations.\n }\n\n direction LR\n InputFile --> Source1 : open file\n InputBuffer --> Source2 : wrap buffer\n\n state Session {\n Source1 --> Invocation1\n Source2 --> Invocation2\n Invocation1 --> Invocation1 : run pipeline\n Invocation2 --> Invocation2 : run pipeline\n }\n\n Invocation1 --> Output1File : write file\n Invocation1 --> Output1Buffer : map memory\n Invocation2 --> Output2Buffer : map memory
"},{"location":"reference/bindings/c-api/#sessions","title":"Sessions","text":"A session (iree_compiler_session_t
) is a scope where one or more invocations can run.
- Internally, sessions consist of an
MLIRContext
and a private set of options. - Sessions may activate available plugins based on their options.
"},{"location":"reference/bindings/c-api/#invocations","title":"Invocations","text":"An invocation (iree_compiler_invocation_t
) is a discrete run of the compiler.
- Invocations run pipelines, consisting of passes, to translate from sources to outputs.
"},{"location":"reference/bindings/c-api/#sources","title":"Sources","text":"A source (iree_compiler_source_t
) represents an input program, including operations and data.
- Sources may refer to files or buffers in memory.
"},{"location":"reference/bindings/c-api/#outputs","title":"Outputs","text":"An output (iree_compiler_output_t
) represents a compilation artifact.
- Outputs can be standalone files or more advanced streams.
"},{"location":"reference/bindings/c-api/#plugins","title":"Plugins","text":"A plugin extends the compiler with some combination of target backends, options, passes, or pipelines.
"},{"location":"reference/bindings/c-api/#usage","title":"Usage","text":""},{"location":"reference/bindings/c-api/#compiler-session-api","title":"Compiler session API","text":"This snippet shows the general layout of the API. For working examples, see the samples below.
To build a custom tool using the compiler API:
CMakeLists.txtset(_IREE_COMPILER_API \"${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler\")\ntarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_COMPILER_API})\ntarget_link_libraries(${_NAME} iree_compiler_bindings_c_loader)\n
iree_compiler_demo.c#include <iree/compiler/embedding_api.h>\n#include <iree/compiler/loader.h>\n\nint main(int argc, char** argv) {\n// Load the compiler library then initialize it.\nireeCompilerLoadLibrary(\"libIREECompiler.so\");\nireeCompilerGlobalInitialize();\n\n// Create a session to track compiler state and set flags.\niree_compiler_session_t *session = ireeCompilerSessionCreate();\nireeCompilerSessionSetFlags(session, argc, argv);\n\n// Open a file as an input source to the compiler.\niree_compiler_source_t *source = NULL;\nireeCompilerSourceOpenFile(session, \"input.mlir\", &source);\n\n// Use an invocation to compile from the input source to one or more outputs.\niree_compiler_invocation_t *inv = ireeCompilerInvocationCreate(session);\nireeCompilerInvocationPipeline(inv, IREE_COMPILER_PIPELINE_STD);\n\n// Output the compiled artifact to a file.\niree_compiler_output_t *output = NULL;\nireeCompilerOutputOpenFile(\"output.vmfb\", &output);\nireeCompilerInvocationOutputVMBytecode(inv, output);\n\n// Cleanup state.\nireeCompilerInvocationDestroy(inv);\nireeCompilerOutputDestroy(output);\nireeCompilerSourceDestroy(source);\nireeCompilerSessionDestroy(session);\nireeCompilerGlobalShutdown();\n}\n
"},{"location":"reference/bindings/c-api/#compiler-plugins","title":"Compiler plugins","text":"This snippet comes from the example compiler plugin. For other examples, see the samples below.
To add a compiler plugin that extends the compiler with custom options:
samples/compiler_plugins/example/CMakeLists.txtiree_cc_library(\nNAME\nregistration\nSRCS\n\"src/PluginRegistration.cpp\"\nDEPS\n::defs\nMLIRIR\niree::compiler::PluginAPI\nPUBLIC\n)\n\niree_compiler_register_plugin(\nPLUGIN_ID\nexample\nTARGET\n::registration\n)\n
samples/compiler_plugins/example/src/PluginRegistration.cpp#include \"iree/compiler/PluginAPI/Client.h\"\n#include \"mlir/IR/Diagnostics.h\"\n#include \"mlir/IR/Location.h\"\n#include \"mlir/IR/MLIRContext.h\"\n\nusing namespace mlir;\nusing namespace mlir::iree_compiler;\n\nnamespace {\n\nstruct MyOptions {\nbool flag = false;\n\nvoid bindOptions(OptionsBinder &binder) {\nstatic llvm::cl::OptionCategory category(\"IREE Example Plugin\");\nbinder.opt<bool>(\"iree-example-flag\", flag,\nllvm::cl::desc(\"Dummy flag for the example plugin\"),\nllvm::cl::cat(category));\n}\n};\n\nstruct MySession : public PluginSession<MySession, MyOptions> {\nLogicalResult onActivate() override {\nmlir::emitRemark(mlir::UnknownLoc::get(context))\n<< \"This remark is from the example plugin activation (flag=\"\n<< options.flag << \")\";\nreturn success();\n}\n};\n\n} // namespace\n\nIREE_DEFINE_COMPILER_OPTION_FLAGS(MyOptions);\n\nextern \"C\" bool iree_register_compiler_plugin_example(\nmlir::iree_compiler::PluginRegistrar *registrar) {\nregistrar->registerPlugin<MySession>(\"example\");\nreturn true;\n}\n
"},{"location":"reference/bindings/c-api/#samples","title":"Samples","text":"Project Source Description iree-org/iree-template-compiler-cmake hello_compiler.c
Compiler application template openxla/iree integrations/pjrt/.../iree_compiler.cc
JIT for TensorFlow + JAX to IREE openxla/iree compiler/plugins
In-tree supported compiler plugins openxla/iree samples/compiler_plugins/
In-tree sample compiler plugins nod-ai/iree-amd-aie plugins/.../iree-amd-aie
Early-phase plugins for interfacing with AMD AIE accelerators"},{"location":"reference/bindings/c-api/#runtime-api","title":"Runtime API","text":"The IREE runtime is structured as a modular set of library components. Each component is designed to be linked into applications directly and compiled with LTO style optimizations.
The low level library components can be used directly or through a higher level API.
High level APILow level API The high level 'runtime' API sits on top of the low level components. It is relatively terse but does not expose the full flexibility of the underlying systems.
graph TD\n accTitle: IREE runtime high level API diagram\n accDescr {\n The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n their own types and API methods.\n A high level \"runtime API\" sits on top of these component APIs.\n Applications can interface indirectly with the IREE runtime via this\n high level runtime API.\n }\n\n subgraph iree_runtime[IREE Runtime]\n subgraph base\n base_types(\"Types\n\n \u2022 allocator\n \u2022 status\n \u2022 etc.\")\n end\n\n subgraph hal[HAL]\n hal_types(\"Types\n\n \u2022 buffer\n \u2022 device\n \u2022 etc.\")\n\n hal_drivers(\"Drivers\n\n \u2022 local-*\n \u2022 vulkan\n \u2022 etc.\")\n end\n\n subgraph vm[VM]\n vm_types(\"Types\n\n \u2022 context\n \u2022 invocation\n \u2022 etc.\")\n end\n\n runtime_api(\"Runtime API\n\n \u2022 instance\n \u2022 session\n \u2022 call\")\n\n base_types & hal_types & hal_drivers & vm_types --> runtime_api\n end\n\n application(Your application)\n\n runtime_api --> application
Each runtime component has its own low level API. The low level APIs are typically verbose as they expose the full flexibility of each underlying system.
graph TD\n accTitle: IREE runtime low level API diagram\n accDescr {\n The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n their own types and API methods.\n Applications can interface directly with the IREE runtime via the low\n level component APIs.\n }\n\n subgraph iree_runtime[IREE Runtime]\n subgraph base\n base_types(\"Types\n\n \u2022 allocator\n \u2022 status\n \u2022 etc.\")\n end\n subgraph hal[HAL]\n hal_types(\"Types\n\n \u2022 buffer\n \u2022 device\n \u2022 etc.\")\n\n hal_drivers(\"Drivers\n\n \u2022 local-*\n \u2022 vulkan\n \u2022 etc.\")\n end\n subgraph vm[VM]\n vm_types(\"Types\n\n \u2022 context\n \u2022 invocation\n \u2022 etc.\")\n end\n end\n\n application(Your application)\n\n base_types & hal_types & hal_drivers & vm_types --> application
Runtime API header files are organized by component:
Component header file Overview iree/runtime/api.h
High level runtime API iree/base/api.h
Core API, type definitions, ownership policies, utilities iree/vm/api.h
VM APIs: loading modules, I/O, calling functions iree/hal/api.h
HAL APIs: device management, synchronization, accessing hardware features"},{"location":"reference/bindings/c-api/#high-level-concepts","title":"High level concepts","text":"The high level API uses instances, sessions, and calls to run programs with a small API surface.
stateDiagram-v2\n accTitle: IREE runtime high level API state diagram\n accDescr {\n Instances track sessions and state: options, drivers, devices.\n Sessions track calls and state: a device and bytecode/VM modules.\n Calls track input and output lists.\n }\n\n state iree_runtime_instance_t {\n instance_state: state<br>- options<br>- drivers<br>- devices\n\n state iree_runtime_session_t {\n session_state: state<br>- device<br>- VM / bytecode modules\n state iree_runtime_call_t {\n inputs\n outputs\n }\n }\n }
"},{"location":"reference/bindings/c-api/#instance","title":"Instance","text":"An instance (iree_runtime_instance_t
) isolates runtime usage and manages device resources.
- Instances may service multiple sessions to avoid extra device interaction and reuse caches/pools.
- Separate instances are isolated/sandboxed from one another.
"},{"location":"reference/bindings/c-api/#session","title":"Session","text":"A session (iree_runtime_session_t
) contains a set of loaded modules and their state.
- Sessions that share an instance may share resources directly.
- Sessions that do not share an instance can transfer resources using import and export APIs.
"},{"location":"reference/bindings/c-api/#call","title":"Call","text":"A call (iree_runtime_call_t
) is a stateful VM function call builder.
- Calls can be reused to avoid having to construct input lists for each invocation.
"},{"location":"reference/bindings/c-api/#low-level-concepts","title":"Low level concepts","text":""},{"location":"reference/bindings/c-api/#base","title":"Base","text":"Under construction, more coming soon
"},{"location":"reference/bindings/c-api/#vm","title":"VM","text":"IREE uses its own Virtual Machine (VM) at runtime to interpret program instructions on the host system.
Tip - EmitC alternate lowering path VM instructions may be further lowered to C source code for static or resource constrained deployment.
See the --output-format=vm-c
compiler option and the samples in samples/emitc_modules/
for more information.
The VM supports generic operations like loads, stores, arithmetic, function calls, and control flow. The VM builds streams of more complex program logic and dense math into HAL command buffers that are dispatched to hardware backends.
- VM instances can serve multiple isolated execution contexts.
- VM contexts are effectively sandboxes for loading modules and running programs.
-
VM modules provide all functionality to execution contexts, including access to hardware accelerators through the HAL. Compiled user programs are also modules.
stateDiagram-v2\n accTitle: Sample VM Modules\n accDescr {\n Bytecode modules contain program state, program functions, and debug\n information.\n HAL modules contain devices, executables, HAL functions, and HAL types.\n Custom modules may contain external functions and custom types.\n }\n\n state \"Bytecode module\" as bytecode {\n bytecode_contents: Module state<br>Program functions<br>Debug information\n }\n\n state \"HAL module\" as HAL {\n hal_contents: Devices<br>Executables<br>HAL functions<br>HAL types\n }\n\n state \"Custom module\" as custom {\n custom_contents: External functions<br>Custom types\n }
"},{"location":"reference/bindings/c-api/#hal","title":"HAL","text":"IREE uses a Hardware Abstraction Layer (HAL) to model and interact with hardware devices like CPUs, GPUs and other accelerators.
- HAL drivers are used to enumerate and create HAL devices.
- HAL devices interface with hardware, such as by allocating device memory, preparing executables, recording and dispatching command buffers, and synchronizing with the host.
- HAL buffers represent data storage and buffer views represent views into that storage with associated shapes and types (similar to \"tensors\").
"},{"location":"reference/bindings/c-api/#usage_1","title":"Usage","text":"This snippet shows the general layout of the API. For working examples, see the samples below.
CMakeLists.txttarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_RUNTIME_ROOT})\ntarget_link_libraries(${_NAME} iree_runtime_runtime)\n
iree_runtime_demo.c#include <iree/runtime/api.h>\n\nint main(int argc, char** argv) {\n// Setup the shared runtime instance.\niree_runtime_instance_options_t instance_options;\niree_runtime_instance_options_initialize(&instance_options);\niree_runtime_instance_options_use_all_available_drivers(&instance_options);\niree_runtime_instance_t* instance = NULL;\niree_runtime_instance_create(\n&instance_options, iree_allocator_system(), &instance);\n\n// Create the HAL device used to run the workloads.\niree_hal_device_t* device = NULL;\niree_runtime_instance_try_create_default_device(\ninstance, iree_make_cstring_view(\"local-task\"), &device);\n\n// Create a session to hold the module state.\niree_runtime_session_options_t session_options;\niree_runtime_session_options_initialize(&session_options);\niree_runtime_session_t* session = NULL;\niree_runtime_session_create_with_device(\ninstance, &session_options, device,\niree_runtime_instance_host_allocator(instance), &session);\n\n// Load the compiled user module from a file.\niree_runtime_session_append_bytecode_module_from_file(\nsession, \"program.vmfb\");\n\n// Build and issue the call.\niree_runtime_call_t call;\niree_runtime_call_initialize_by_name(\nsession, iree_make_cstring_view(\"module.entry_function_name\"), &call);\n// iree_runtime_call_inputs_push_back_buffer_view(...);\niree_runtime_call_invoke(&call, /*flags=*/0);\n\n// Retrieve the function outputs and clean up the call.\n// iree_runtime_call_outputs_pop_front_buffer_view(...);\niree_runtime_call_deinitialize(&call);\n\n// Cleanup state.\niree_runtime_session_release(session);\niree_hal_device_release(device);\niree_runtime_instance_release(instance);\n}\n
"},{"location":"reference/bindings/c-api/#samples_1","title":"Samples","text":"Project Source Description iree-org/iree-template-runtime-cmake hello_world.c
Runtime application template openxla/iree runtime/demo/
In-tree demos of the high level runtime API openxla/iree samples/
In-tree sample applications iree-org/iree-samples runtime-library/
Shared runtime library builderBuilds libireert.so
to aid development iml130/iree-template-cpp simple_embedding.c
Demo integration into a project"},{"location":"reference/bindings/c-api/#compiler-runtime-jit","title":"Compiler + Runtime = JIT","text":"The compiler and runtime APIs may be used together to build a \"just in time\" (JIT) execution engine. JIT compilation allows for last-minute specialization with no prior knowledge of target devices and avoids issues with version drift, but it can also constrain deployment options and usage scenarios.
"},{"location":"reference/bindings/python/","title":"Python bindings","text":"","tags":["Python"]},{"location":"reference/bindings/python/#overview","title":"Overview","text":"IREE offers Python bindings split into several packages, covering different components:
PIP package name Description iree-compiler
IREE's generic compiler tools and helpers iree-runtime
IREE's runtime, including CPU and GPU backends iree-tools-tf
Tools for importing from TensorFlow iree-tools-tflite
Tools for importing from TensorFlow Lite iree-jax
Tools for importing from JAX Collectively, these packages allow for importing from frontends, compiling towards various targets, and executing compiled code on IREE's backends.
","tags":["Python"]},{"location":"reference/bindings/python/#prerequisites","title":"Prerequisites","text":"To use IREE's Python bindings, you will first need to install Python 3 and pip, as needed.
Tip - Virtual environments We recommend using virtual environments to manage python packages, such as through venv
(about, tutorial):
Linux macOS Windows python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\n.venv\\Scripts\\activate.bat\n
When done, run deactivate
.
","tags":["Python"]},{"location":"reference/bindings/python/#installing-iree-packages","title":"Installing IREE packages","text":"","tags":["Python"]},{"location":"reference/bindings/python/#prebuilt-packages","title":"Prebuilt packages","text":"Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime\n
","tags":["Python"]},{"location":"reference/bindings/python/#building-from-source","title":"Building from source","text":"See Building Python bindings page for instructions for building from source.
","tags":["Python"]},{"location":"reference/bindings/python/#usage","title":"Usage","text":"Info - API reference pages
API reference pages for IREE's runtime and compiler Python APIs are hosted on readthedocs.
Documentation for the MLIR compiler Python APIs can be found at https://mlir.llvm.org/docs/Bindings/Python/.
","tags":["Python"]},{"location":"reference/bindings/python/#compile-a-program","title":"Compile a program","text":"from iree import compiler as ireec\n\n# Compile a module.\nINPUT_MLIR = \"\"\"\nmodule @arithmetic {\n func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {\n %0 = arith.mulf %arg0, %arg1 : tensor<4xf32>\n return %0 : tensor<4xf32>\n }\n}\n\"\"\"\n\n# Compile using the vmvx (reference) target:\ncompiled_flatbuffer = ireec.tools.compile_str(\n INPUT_MLIR,\n target_backends=[\"vmvx\"])\n
","tags":["Python"]},{"location":"reference/bindings/python/#run-a-compiled-program","title":"Run a compiled program","text":"from iree import runtime as ireert\nimport numpy as np\n\n# Register the module with a runtime context.\n# Use the \"local-task\" CPU driver, which can load the vmvx executable:\nconfig = ireert.Config(\"local-task\")\nctx = ireert.SystemContext(config=config)\nvm_module = ireert.VmModule.copy_buffer(ctx.instance, compiled_flatbuffer)\nctx.add_vm_module(vm_module)\n\n# Invoke the function and print the result.\nprint(\"INVOKE simple_mul\")\narg0 = np.array([1., 2., 3., 4.], dtype=np.float32)\narg1 = np.array([4., 5., 6., 7.], dtype=np.float32)\nf = ctx.modules.arithmetic[\"simple_mul\"]\nresults = f(arg0, arg1).to_host()\nprint(\"Results:\", results)\n
","tags":["Python"]},{"location":"reference/bindings/python/#samples","title":"Samples","text":"Check out the samples in IREE's samples/colab/ directory and the iree-samples repository for examples using the Python APIs.
","tags":["Python"]},{"location":"reference/bindings/python/#console-scripts","title":"Console scripts","text":"The Python packages include console scripts for most of IREE's native tools like iree-compile
and iree-run-module
. After installing a package from pip, these should be added to your path automatically:
$ python -m pip install iree-runtime\n$ which iree-run-module\n\n/projects/.venv/Scripts/iree-run-module\n
","tags":["Python"]},{"location":"reference/bindings/python/#profiling","title":"Profiling","text":"The tools in the iree-runtime
package support variants:
Variant name Description default Standard runtime tools tracy Runtime tools instrumented using the Tracy profiler Switch between variants of the installed tools using the IREE_PY_RUNTIME
environment variable:
IREE_PY_RUNTIME=tracy iree-run-module ...\n
See the developer documentation page on Profiling with Tracy for information on using Tracy.
","tags":["Python"]},{"location":"reference/mlir-dialects/","title":"MLIR dialects","text":"These pages contain automatically generated documentation for the MLIR dialects defined in the IREE repository. IREE also makes extensive use of dialects from the upstream MLIR repository, which are documented at https://mlir.llvm.org/docs/Dialects/.
"},{"location":"reference/mlir-dialects/#iree-internal-dialects","title":"IREE internal dialects","text":"These dialects are an implementation detail of the IREE compiler, though they can be used by plugins and other advanced integrations. The sources for most of these dialects can be found in the iree/compiler/Dialect/
directory.
Dialect Description Check Defines assertions for IREE tests Flow Models execution data flow and partitioning HAL Represents operations against the IREE HAL1 HAL/Inline Inline HAL interop runtime module dialect HAL/Loader HAL inline executable loader runtime module dialect IO/Parameters External parameter resource management APIs Stream Model execution partitioning and scheduling Util Types and ops common across IREE subdialects VM Represents operations against an abstract virtual machine VMVX Virtual Machine Vector Extensions"},{"location":"reference/mlir-dialects/#iree-public-dialects","title":"IREE public dialects","text":"The ops in these dialects are legal to include in compiler inputs. The sources for these dialects can be found in the llvm-external-projects/iree-dialects/
directory that is designed to be used from other projects via LLVM's external projects mechanism.
Dialect Description IREEInput Structural ops legal as input to IREE's compiler IREELinalgExt Extensions to the Linalg dialect for specific operations IREEVectorExt Extensions to the Vector dialect for specific operations -
Hardware Abstraction Layer\u00a0\u21a9
"},{"location":"reference/mlir-dialects/Check/","title":"Check","text":""},{"location":"reference/mlir-dialects/Check/#check-dialect","title":"'check' Dialect","text":"A dialect implementing test assertions for IREE modules.
- 'check' Dialect
- Operation definition
- check.expect_all_true (Check::ExpectAllTrueOp)
- check.expect_almost_eq (Check::ExpectAlmostEqOp)
- check.expect_almost_eq_const (Check::ExpectAlmostEqConstOp)
- check.expect_eq (Check::ExpectEqOp)
- check.expect_eq_const (Check::ExpectEqConstOp)
- check.expect_false (Check::ExpectFalseOp)
- check.expect_true (Check::ExpectTrueOp)
"},{"location":"reference/mlir-dialects/Check/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Check/#checkexpect_all_true-checkexpectalltrueop","title":"check.expect_all_true
(Check::ExpectAllTrueOp)","text":"Checks that the operand contains only values that are true
Syntax:
operation ::= `check.expect_all_true` (`` `<` $device^ `>`)?\n `` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains true values, which are represented by any non-zero integer.
Issues a non-fatal failure if the verification fails.
check.expect_all_true<%device>(%arg0) : !hal.buffer_view\ncheck.expect_all_true(%arg1) : tensor<2x2xi32>\n
"},{"location":"reference/mlir-dialects/Check/#operands","title":"Operands:","text":"Operand Description device
device operand
buffer_view or tensor of signless integer values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq-checkexpectalmosteqop","title":"check.expect_almost_eq
(Check::ExpectAlmostEqOp)","text":"Checks that the operands are almost equal
Syntax:
operation ::= `check.expect_almost_eq` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n
Verifies that the buffer view or tensor operands with float elements are almost equal to within an implementation-defined \"reasonable\" tolerance.
Issues a non-fatal failure if the verification fails.
check.expect_almost_eq(%arg0, %arg1) : tensor<5xf32>\n
"},{"location":"reference/mlir-dialects/Check/#operands_1","title":"Operands:","text":"Operand Description device
device lhs
buffer_view or tensor of floating-point values rhs
buffer_view or tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq_const-checkexpectalmosteqconstop","title":"check.expect_almost_eq_const
(Check::ExpectAlmostEqConstOp)","text":"Checks that the tensor operand is almost equal to some constant
Syntax:
operation ::= `check.expect_almost_eq_const` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n
Verifies that the tensor operand with float elements is almost equal to the constant attribute within an implementation-defined \"reasonable\" tolerance.
Issues a non-fatal failure if the verification fails.
This op is just a convenience wrapper around the expect_almost_eq op.
check.expect_almost_eq_const(%const0, dense<[0.999999, 2.0]> : tensor<5xf32>) : tensor<5xf32>\n
"},{"location":"reference/mlir-dialects/Check/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_2","title":"Operands:","text":"Operand Description device
device lhs
tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq-checkexpecteqop","title":"check.expect_eq
(Check::ExpectEqOp)","text":"Checks that the tensor or buffer view operands are equal
Syntax:
operation ::= `check.expect_eq` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n
Verifies that the operands are exactly equal.
Issues a non-fatal failure if the verification fails.
check.expect_eq(%arg0, %arg1) : tensor<5xi32>\n
"},{"location":"reference/mlir-dialects/Check/#operands_3","title":"Operands:","text":"Operand Description device
device lhs
buffer_view or tensor of any type values rhs
buffer_view or tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq_const-checkexpecteqconstop","title":"check.expect_eq_const
(Check::ExpectEqConstOp)","text":"Checks that the tensor operand is equal to some constant
Syntax:
operation ::= `check.expect_eq_const` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n
Verifies that the tensor operand is exactly equal to a constant attribute.
Issues a non-fatal failure if the verification fails.
This op is just a convenience wrapper around the expect_eq op.
check.expect_eq_const(%arg0, dense<[1, 2]> : tensor<2xi32>) : tensor<2xi32>\n
"},{"location":"reference/mlir-dialects/Check/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_4","title":"Operands:","text":"Operand Description device
device lhs
tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_false-checkexpectfalseop","title":"check.expect_false
(Check::ExpectFalseOp)","text":"Checks that the operand is false
Syntax:
operation ::= `check.expect_false` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains a false value, which is represented by zero.
Issues a non-fatal failure if the verification fails.
check.expect_false(%arg0) : i32\n
"},{"location":"reference/mlir-dialects/Check/#operands_5","title":"Operands:","text":"Operand Description operand
signless integer"},{"location":"reference/mlir-dialects/Check/#checkexpect_true-checkexpecttrueop","title":"check.expect_true
(Check::ExpectTrueOp)","text":"Checks that the operand is true
Syntax:
operation ::= `check.expect_true` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains a true value, which is represented by any non-zero integer.
Issues a non-fatal failure if the verification fails.
check.expect_true(%arg0) : i32\n
"},{"location":"reference/mlir-dialects/Check/#operands_6","title":"Operands:","text":"Operand Description operand
signless integer"},{"location":"reference/mlir-dialects/Flow/","title":"Flow","text":""},{"location":"reference/mlir-dialects/Flow/#flow-dialect","title":"'flow' Dialect","text":"A dialect designed to model execution data flow and partitioning.
The flow dialect is used to model regions of dense computation and the data flow between them. MLIR value-semantic tensors are used as the primary data type to allow SSA use-def to provide a bulk of the infrastructure required to perform the computation partitioning and outlining.
The dialect is designed to ingest relatively high-level linear algebra via XLA HLO ops (that also operate on the value-semantic tensor types) and optionally MLIR standard ops for control flow and other actions. After conversion of any higher-level ops that have special semantics in the flow dialect, such as global variables, the rest are partitioned into regions containing simple and compatible computations. Finally, outlining moves the computations into executables and leaves only the execution flow encoded via dispatch operations.
The primary unit of interest is a \"dispatch region\" containing compatible computations that can be scheduled together efficiently (and safely). \"Compatible\" here is specified as similarly shaped workloads that indicate how many invocations a computation can be parallelized across when running in a SPMD execution model. Though it depends on the particular runtime backends this more concretely means things like the untiled workload (or tiled workgroups) used in GPU dispatches or similar thread pool executors.
After identification of the dispatchable regions a set of transformations performs folding and simplification to reduce the total number of dispatches. Heuristics are used in certain cases to more efficiently schedule special ops (such as GEMM) and the design is amenable to profile- guided analysis that can be added in the future.
The resulting outlined executable modules containing the dispatchable code can be translated to one or more backends (such as SPIR-V for Vulkan, or LLVM IR for running on the CPU, etc). The IR that is outlined is untouched and in the input format (such as XLA HLO ops) allowing conversion using any MLIR target that supports ingesting such input. A few special ops are used to communicate statically available information such as the expected workload size, shapes of inputs and outputs, etc.
- 'flow' Dialect
- Operation definition
- Collective communication ops
- flow.channel.count (Flow::ChannelCountOp)
- flow.channel.default (Flow::ChannelDefaultOp)
- flow.channel.rank (Flow::ChannelRankOp)
- flow.channel.split (Flow::ChannelSplitOp)
- flow.collective.all_gather (Flow::CollectiveAllGatherOp)
- flow.collective.all_reduce (Flow::CollectiveAllReduceOp)
- flow.collective.all_to_all (Flow::CollectiveAllToAllOp)
- flow.collective.reduce_scatter (Flow::CollectiveReduceScatterOp)
- flow.collective.send_recv (Flow::CollectiveSendRecvOp)
- Dispatch ops
- flow.dispatch (Flow::DispatchOp)
- Executable ops
- flow.executable_end (Flow::ExecutableEndOp)
- flow.executable.export (Flow::ExecutableExportOp)
- flow.executable (Flow::ExecutableOp)
- Partitioned region ops
- flow.dispatch.region (Flow::DispatchRegionOp)
- flow.dispatch.tensor.load (Flow::DispatchTensorLoadOp)
- flow.dispatch.tensor.store (Flow::DispatchTensorStoreOp)
- flow.dispatch.tie_shape (Flow::DispatchTieShapeOp)
- flow.dispatch.workgroup.count (Flow::DispatchWorkgroupCountOp)
- flow.dispatch.workgroup.id (Flow::DispatchWorkgroupIDOp)
- flow.dispatch.workgroup.size (Flow::DispatchWorkgroupSizeOp)
- flow.dispatch.workgroups (Flow::DispatchWorkgroupsOp)
- flow.return (Flow::ReturnOp)
- Streamable call ops
- flow.call (Flow::CallOp)
- flow.func (Flow::FuncOp)
- Tensor ops
- flow.dispatch.workgroup_count_from_dag_root (Flow::DispatchWorkgroupCountFromDagRootOp)
- flow.dispatch.workgroup_count_from_slice (Flow::DispatchWorkgroupCountFromSliceOp)
- flow.dispatch.workload.ordinal (Flow::DispatchWorkloadOrdinalOp)
- flow.tensor.alloca (Flow::TensorAllocaOp)
- flow.tensor.bitcast (Flow::TensorBitCastOp)
- flow.tensor.clone (Flow::TensorCloneOp)
- flow.tensor.constant (Flow::TensorConstantOp)
- flow.tensor.empty (Flow::TensorEmptyOp)
- flow.tensor.load (Flow::TensorLoadOp)
- flow.tensor.reshape (Flow::TensorReshapeOp)
- flow.tensor.slice (Flow::TensorSliceOp)
- flow.tensor.splat (Flow::TensorSplatOp)
- flow.tensor.store (Flow::TensorStoreOp)
- flow.tensor.tie_shape (Flow::TensorTieShapeOp)
- flow.tensor.trace (Flow::TensorTraceOp)
- flow.tensor.update (Flow::TensorUpdateOp)
- Attribute definition
- DummyAttr
- Type constraint definition
- dispatch.tensor
- dispatch.tensor
- dispatch.tensor
- Type definition
- ChannelType
- DummyType
"},{"location":"reference/mlir-dialects/Flow/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Flow/#collective-communication-ops","title":"Collective communication ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowchannelcount-flowchannelcountop","title":"flow.channel.count
(Flow::ChannelCountOp)","text":"Returns the total number of participants in the group
Syntax:
operation ::= `flow.channel.count` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the total participant count in the collective communicator group.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands","title":"Operands:","text":"Operand Description channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowchanneldefault-flowchanneldefaultop","title":"flow.channel.default
(Flow::ChannelDefaultOp)","text":"Returns a default collective communication channel
Syntax:
operation ::= `flow.channel.default` ($group^)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a channel initialized using the runtime environment.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription group
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#results_1","title":"Results:","text":"Result Description result
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowchannelrank-flowchannelrankop","title":"flow.channel.rank
(Flow::ChannelRankOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `flow.channel.rank` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_1","title":"Operands:","text":"Operand Description channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowchannelsplit-flowchannelsplitop","title":"flow.channel.split
(Flow::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `flow.channel.split` $channel `,` $color `,` $key\n `:` type($channel) `->` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group.
Interfaces: InferTypeOpInterface, OpAsmOpInterface
"},{"location":"reference/mlir-dialects/Flow/#operands_2","title":"Operands:","text":"Operand Description channel
a collecive communication channel color
index key
index"},{"location":"reference/mlir-dialects/Flow/#results_3","title":"Results:","text":"Result Description result
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_gather-flowcollectiveallgatherop","title":"flow.collective.all_gather
(Flow::CollectiveAllGatherOp)","text":"Performs all-gather operation
Syntax:
operation ::= `flow.collective.all_gather` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
It gathers data from all ranks and concatenates them on the 0-th dimension. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_3","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_4","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_reduce-flowcollectiveallreduceop","title":"flow.collective.all_reduce
(Flow::CollectiveAllReduceOp)","text":"Performs all-reduce operation
Syntax:
operation ::= `flow.collective.all_reduce` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation reduces data across all the ranks in the channel. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription reduction_op
mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_4","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_5","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_to_all-flowcollectivealltoallop","title":"flow.collective.all_to_all
(Flow::CollectiveAllToAllOp)","text":"Performs all-to-all operation
Syntax:
operation ::= `flow.collective.all_to_all` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
This operation mutually exchanges data acrosss all of the ranks in the channel. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_5","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_6","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivereduce_scatter-flowcollectivereducescatterop","title":"flow.collective.reduce_scatter
(Flow::CollectiveReduceScatterOp)","text":"Performs reduce and scatter operations
Syntax:
operation ::= `flow.collective.reduce_scatter` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation reduces data across all the ranks in the channel and scatters the result to each rank. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription reduction_op
mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_6","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_7","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivesend_recv-flowcollectivesendrecvop","title":"flow.collective.send_recv
(Flow::CollectiveSendRecvOp)","text":"Performs a grouped send and receive operation
Syntax:
operation ::= `flow.collective.send_recv` $element_type `,` $target `,` $source `,` $channel `,` $send `,` $recv `:`\n `(` type($target) `,` type($source) `,` type($channel) `,` type($send) `,` type($recv) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation sends data to the rank specificied by send and receives data from the rank specified by recv. If send is -1, this rank will not send any data. If recv is -1, this rank will not receive any data and the output will be all zeros. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_7","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel send
index recv
index"},{"location":"reference/mlir-dialects/Flow/#results_8","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatch-flowdispatchop","title":"flow.dispatch
(Flow::DispatchOp)","text":"A dispatch of workgroups across a grid
Syntax:
operation ::= `flow.dispatch` custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Dispatches workgroups across an grid defined by the captured workload parameters carrying the information required to compute the workgroup count at runtime. The function for converting the workload into a 3D workgroup count is attached to the dispatch entry point and may contain arbitrary host logic.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_8","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_9","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#executable-ops","title":"Executable ops","text":"Executables for outlined regions.
"},{"location":"reference/mlir-dialects/Flow/#flowexecutable_end-flowexecutableendop","title":"flow.executable_end
(Flow::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `flow.executable_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/Flow/#flowexecutableexport-flowexecutableexportop","title":"flow.executable.export
(Flow::ExecutableExportOp)","text":"
Defines an executable entry point for dispatch operations
Syntax:
operation ::= `flow.executable.export` custom<SymbolVisibility>($sym_visibility)\n custom<SymbolAlias>($sym_name, $function_ref)\n custom<WorkgroupCountRegion>($workgroup_count)\n attr-dict-with-keyword\n
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.
Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Flow/#flowexecutable-flowexecutableop","title":"flow.executable
(Flow::ExecutableOp)","text":"Generic executable module
Syntax:
operation ::= `flow.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#partitioned-region-ops","title":"Partitioned region ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchregion-flowdispatchregionop","title":"flow.dispatch.region
(Flow::DispatchRegionOp)","text":"A group of ops
This op is a container/grouping of ops. It represents a fusion group before being lowered to a dispatch region. Ops are collected inside of the region body of the op. Values from parent regions can be captured. Results are yielded with a return
terminator and returned from this op.
dispatch.region
ops are lowered to dispatch.workgroups
ops. Workgroups isolated from above. dispatch.region
ops are a more lightweight abstraction for implementing fusion heuristics, i.e., the process of deciding which ops should form a dispatch region.
This op also has a second region: workload_count
. The arguments to the region represent the workload for the dispatch, and returns the number of workgroups for the dispatch. The region is lowered directly to workload_count
region of dispatch.workgroups
.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_9","title":"Operands:","text":"Operand Description result_dims
variadic of index workload
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_10","title":"Results:","text":"Result Description result
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorload-flowdispatchtensorloadop","title":"flow.dispatch.tensor.load
(Flow::DispatchTensorLoadOp)","text":"Loads a tensor from a dispatch input placeholder
Syntax:
operation ::= `flow.dispatch.tensor.load` $source\n `,` `offsets` `=` custom<DynamicIndexList>(\n $offsets, $static_offsets)\n `,` `sizes` `=` custom<DynamicIndexList>(\n $sizes, $static_sizes)\n `,` `strides` `=` custom<DynamicIndexList>(\n $strides, $static_strides)\n attr-dict `:` type($source) (`{` $source_dims^ `}`)? `->` type($result)\n
Loads an input tensor or subtensor from an input placeholder. As each workgroup executes concurrently all workgroups will receive identical loaded results of regions that may overlap.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OffsetSizeAndStrideOpInterface, ReifyRankedShapedTypeOpInterface, TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription static_offsets
::mlir::DenseI64ArrayAttri64 dense array attribute static_sizes
::mlir::DenseI64ArrayAttri64 dense array attribute static_strides
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_10","title":"Operands:","text":"Operand Description source
dispatch.tensor source_dims
variadic of index offsets
variadic of index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_11","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorstore-flowdispatchtensorstoreop","title":"flow.dispatch.tensor.store
(Flow::DispatchTensorStoreOp)","text":"Stores a tensor into a dispatch output placeholder
Syntax:
operation ::= `flow.dispatch.tensor.store` $value `,` $target\n `,` `offsets` `=` custom<DynamicIndexList>(\n $offsets, $static_offsets)\n `,` `sizes` `=` custom<DynamicIndexList>(\n $sizes, $static_sizes)\n `,` `strides` `=` custom<DynamicIndexList>(\n $strides, $static_strides)\n attr-dict `:` type($value) `->` type($target) (`{` $target_dims^ `}`)?\n
Stores a tensor or subtensor into an output tensor placeholder. As each workgroup executes concurrently behavior is undefined if more than one workgroup stores into overlapping regions of the full output tensor.
Traits: AttrSizedOperandSegments
Interfaces: OffsetSizeAndStrideOpInterface, Util_ShapeAwareOp
"},{"location":"reference/mlir-dialects/Flow/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription static_offsets
::mlir::DenseI64ArrayAttri64 dense array attribute static_sizes
::mlir::DenseI64ArrayAttri64 dense array attribute static_strides
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_11","title":"Operands:","text":"Operand Description value
ranked tensor of any type values target
dispatch.tensor target_dims
variadic of index offsets
variadic of index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtie_shape-flowdispatchtieshapeop","title":"flow.dispatch.tie_shape
(Flow::DispatchTieShapeOp)","text":"Ties a runtime shape to a dispatch I/O argument
Syntax:
operation ::= `flow.dispatch.tie_shape` $operand attr-dict\n `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Metadata op used to tie a runtime-computed shape with dynamic dimensions to a dispatch input/output argument. All uses of the argument should use the pass-through result of this op to allow for SSA-based shape resolution.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_12","title":"Operands:","text":"Operand Description operand
dispatch.tensor dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_12","title":"Results:","text":"Result Description result
dispatch.tensor"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupcount-flowdispatchworkgroupcountop","title":"flow.dispatch.workgroup.count
(Flow::DispatchWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `flow.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid.
Represented as a 3D grid classically written as XYZ. Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable.
%x = flow.dispatch.workgroup.count[0] : index\n%y = flow.dispatch.workgroup.count[1] : index\n%z = flow.dispatch.workgroup.count[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_13","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupid-flowdispatchworkgroupidop","title":"flow.dispatch.workgroup.id
(Flow::DispatchWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `flow.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current workgroup in the range of [0, flow.dispatch.workgroup.count)
along each dimension.
Represented as a 3D grid classically written as XYZ. Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable.
%x = flow.dispatch.workgroup.id[0] : index\n%y = flow.dispatch.workgroup.id[1] : index\n%z = flow.dispatch.workgroup.id[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_14","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupsize-flowdispatchworkgroupsizeop","title":"flow.dispatch.workgroup.size
(Flow::DispatchWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `flow.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Workgroup sizes are not determined at the flow dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.
Represented as a 3D grid classically written as XYZ. Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable.
%x = flow.dispatch.workgroup.size[0] : index\n%y = flow.dispatch.workgroup.size[1] : index\n%z = flow.dispatch.workgroup.size[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_15","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroups-flowdispatchworkgroupsop","title":"flow.dispatch.workgroups
(Flow::DispatchWorkgroupsOp)","text":"A dispatch of workgroups across a 3-dimensional grid
Syntax:
operation ::= `flow.dispatch.workgroups` (`[` $workload^ `]`)? ``\n `(` $arguments `)` `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n attr-dict-with-keyword\n `=` `\\n` ` ` ` ` ` `\n custom<DispatchWorkgroupBody>(ref(type($arguments)),\n ref(type($results)),\n $workgroup_body)\n `` custom<DispatchWorkgroupsCountRegion>($workgroup_count)\n
Dispatches some number of workgroups across a 3-dimensional grid. The body region will be invoked for each workgroup with a unique flow.dispatch.workgroup.id
in the range of [0, flow.dispatch.workgroup.count)
(along each dimension XYZ).
From the outside the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors.
Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the flow.dispatch.workgroup.*
ops to query the workgroup ID/count/size instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc).
Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it.
%r = flow.dispatch.workgroups[%c5, %c5](%0, %1)\n : (tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32> =\n (%arg0: !flow.dispatch.tensor<readonly:tensor<5x5xf32>>,\n %arg1: !flow.dispatch.tensor<readonly:tensor<5xf32>>,\n %arg2: !flow.dispatch.tensor<writeonly:tensor<5x5xf32>>) {\n ...\n}\n
The number of results of the operation is equal to the number of results in the type signature ((tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>
). Each tensor argument and result in the type signature has a corresponding block argument of type !flow.dispatch.tensor
. Furthermore, each argument has a corresponding arguments
operand.
There are no arguments
operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, -> %0
would tie the first argument to the result. In that case, there would be no separate block argument for the result.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, IsolatedFromAbove
Interfaces: ClosureOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_13","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_16","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowreturn-flowreturnop","title":"flow.return
(Flow::ReturnOp)","text":"Return from a flow.dispatch_region
Syntax:
operation ::= `flow.return` attr-dict ($operands^ `:` type($operands))?\n
Returns the given values from the region and back to the host code.
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_14","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#streamable-call-ops","title":"Streamable call ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowcall-flowcallop","title":"flow.call
(Flow::CallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `flow.call` $callee\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Calls a function taking/returning tensor values with stream semantics. Tensors have their shapes captured and may be tied to denote in-place operations. Asynchronous calls must have no side-effects.
Note that returned tensors must have their shapes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the shape (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If shapes are unknowable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.
Traits: AttrSizedOperandSegments
Interfaces: CallOpInterface, FLOW_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_ShapeAwareOp
"},{"location":"reference/mlir-dialects/Flow/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_15","title":"Operands:","text":"Operand Description arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_17","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowfunc-flowfuncop","title":"flow.func
(Flow::FuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `flow.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n ``\n custom<ShapedFunctionSignature>($function_type,\n $tied_operands,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via flow.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type tied_operands
::mlir::ArrayAttr64-bit integer array attribute sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Flow/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_dag_root-flowdispatchworkgroupcountfromdagrootop","title":"flow.dispatch.workgroup_count_from_dag_root
(Flow::DispatchWorkgroupCountFromDagRootOp)","text":"Workgroup count computed based on iteration range of the root of the DAG for ops within the dispatch.
Syntax:
operation ::= `flow.dispatch.workgroup_count_from_dag_root` attr-dict $operands\n
When using tile + distribution of the root of the DAG (Directed Acyclic Graph) of ops within the dispatch to split the work amongst workgroups. The workload captured is the size of the iteration space of the root of the DAG. This op represents the computation that given the workload returns the number of workgroups to use. The backends are responsible for lowering this op into actual computation (typically based on the tile sizes used to tile and distribute the root of the DAG).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_16","title":"Operands:","text":"Operand Description operands
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_18","title":"Results:","text":"Result Description x
index y
index z
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_slice-flowdispatchworkgroupcountfromsliceop","title":"flow.dispatch.workgroup_count_from_slice
(Flow::DispatchWorkgroupCountFromSliceOp)","text":"Place holder to signify default workgroup count calculation.
Syntax:
operation ::= `flow.dispatch.workgroup_count_from_slice` attr-dict $operands\n
The default computation of the number of workgroups (or workgroup count) assumes that the dispatch + captured values is enough to compute the workgroup count. It does so by using a program slice of the values within the dispatch that represent the number of workgroups when available within the dispatch. Currently the arguments of index types captured by the flow.dispatch.workgroups
is treated as the workload for the operation. It is a requirement that the slice of the program that computes the number of workgroups will need to have its leaves be these captured values.
TODO: This could be generalized in future to allow the slices to encompass arbitrary computation. The computation of the workgroup count can then be done on the device itself, if this is data dependent. In such cases the workload could be more than just values of index types.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_17","title":"Operands:","text":"Operand Description operands
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_19","title":"Results:","text":"Result Description x
index y
index z
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkloadordinal-flowdispatchworkloadordinalop","title":"flow.dispatch.workload.ordinal
(Flow::DispatchWorkloadOrdinalOp)","text":"Annotates the values captured as workload within the body of flow.dispatch.workgroups
op.
Syntax:
operation ::= `flow.dispatch.workload.ordinal` attr-dict $operand `,` $ordinal `:` type($operand)\n
The arguments that represent the captured/returned values of the `flow.dispatch.workgroups, i.e. the signature of the body of the op is not preserved during IREEs compilation. Since the workloads are derived from the operands captured by the operation, this op denotes the values captured as workloads. This can be used in the backends to map back to the workload values while materializing the workgroup count computation.
TODO: Find a better way to represent this information, either by somehow propagating the signature of the created dispatch workgroup op through the compilation stack until the codegen backends, or as a separate list/attribute that can be plumbed through without using explicit ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription ordinal
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#operands_18","title":"Operands:","text":"Operand Description operand
index"},{"location":"reference/mlir-dialects/Flow/#results_20","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowtensoralloca-flowtensorallocaop","title":"flow.tensor.alloca
(Flow::TensorAllocaOp)","text":"An empty tensor allocation with undefined contents
Syntax:
operation ::= `flow.tensor.alloca` `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a new transient tensor allocation with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read. The resulting tensor may be long-lived and allocated as part of a dedicated allocation. Prefer using flow.tensor.empty
whenever possible as this op disables nearly all allocation-related optimizations performed by the compiler. The presence of this op is often an indication of an improper lowering.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Flow/#operands_19","title":"Operands:","text":"Operand Description result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_21","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorbitcast-flowtensorbitcastop","title":"flow.tensor.bitcast
(Flow::TensorBitCastOp)","text":"Bitcasts a tensor
Syntax:
operation ::= `flow.tensor.bitcast` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Bitcasts a tensor to a new type without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_20","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_22","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorclone-flowtensorcloneop","title":"flow.tensor.clone
(Flow::TensorCloneOp)","text":"Performs a full tensor clone operation
Syntax:
operation ::= `flow.tensor.clone` $operand `:` type($result) (`{` $argument_dims^ `}`)?\n attr-dict-with-keyword\n
Clones the input tensor into an identical output tensor.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_21","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values argument_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_23","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorconstant-flowtensorconstantop","title":"flow.tensor.constant
(Flow::TensorConstantOp)","text":"Tensor constant that can have dynamic dimensions
Syntax:
operation ::= `flow.tensor.constant` $value attr-dict `->` type($result)\n
Allows specifying a constant where the return value can erase shape information. This operation is declared as having side effects and has no folder, so will not be optimized away by the compiler. The underlying shape information should be hidden from the compiler and resolved at runtime.
%c = flow.tensor.constant tensor<2x2xf32> -> tensor<?x?xf32>\n%res = math.absf %c : tensor<?x?xf32>\n
"},{"location":"reference/mlir-dialects/Flow/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Flow/#results_24","title":"Results:","text":"Result Description result
tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorempty-flowtensoremptyop","title":"flow.tensor.empty
(Flow::TensorEmptyOp)","text":"An empty tensor carrying metadata but no contents
Syntax:
operation ::= `flow.tensor.empty` `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_22","title":"Operands:","text":"Operand Description result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_25","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorload-flowtensorloadop","title":"flow.tensor.load
(Flow::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `flow.tensor.load` $source (`[` $indices^ `]`)? `:`\n type($source) (`{` $source_dims^ `}`)?\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_23","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_26","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorreshape-flowtensorreshapeop","title":"flow.tensor.reshape
(Flow::TensorReshapeOp)","text":"Reshapes a tensor
Syntax:
operation ::= `flow.tensor.reshape` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Reshapes a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_24","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_27","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorslice-flowtensorsliceop","title":"flow.tensor.slice
(Flow::TensorSliceOp)","text":"Slices out a subregion of a tensor
Syntax:
operation ::= `flow.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Clones a subregion of a tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_25","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index start_indices
variadic of index lengths
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_28","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorsplat-flowtensorsplatop","title":"flow.tensor.splat
(Flow::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `flow.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_26","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_29","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorstore-flowtensorstoreop","title":"flow.tensor.store
(Flow::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `flow.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_27","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target
ranked tensor of any type values target_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_30","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortie_shape-flowtensortieshapeop","title":"flow.tensor.tie_shape
(Flow::TensorTieShapeOp)","text":"Ties a runtime shape to a tensor value
Syntax:
operation ::= `flow.tensor.tie_shape` $operand attr-dict\n `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Metadata op used to tie tensors with their runtime-computed dynamic dimensions. This only exists transiently in the IR as a witness to shape calculations and is removed during lowering.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_28","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_31","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortrace-flowtensortraceop","title":"flow.tensor.trace
(Flow::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `flow.tensor.trace` $key `=` `[`\n custom<ShapedOperandList>($values, type($values), $value_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
Interfaces: ShapeAwareOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#operands_29","title":"Operands:","text":"Operand Description values
variadic of ranked tensor of any type values value_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowtensorupdate-flowtensorupdateop","title":"flow.tensor.update
(Flow::TensorUpdateOp)","text":"Updates a tensor with the contents of another tensor
Syntax:
operation ::= `flow.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n type($update) (`{` $update_dims^ `}`)? `->`\n custom<ShapedTiedResult>(type($result), $target_dims)\n attr-dict-with-keyword\n
Updates the target tensor with the contents of the update tensor at the given offset indices.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_30","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index start_indices
variadic of index update
ranked tensor of any type values update_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_32","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/Flow/#dummyattr","title":"DummyAttr","text":"Syntax: #flow.dummy
"},{"location":"reference/mlir-dialects/Flow/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/Flow/#dispatchtensor","title":"dispatch.tensor","text":"A placeholder for a dispatch region input/output operand. This can be used to query the metadata about the tensor (such as its shape) as well as both load and store from the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_1","title":"dispatch.tensor","text":"A placeholder for a dispatch region input operand. This can be used to query the metadata about the tensor (such as its shape) as well as load from the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_2","title":"dispatch.tensor","text":"A placeholder for a dispatch region output operand. This can be used to query the metadata about the tensor (such as its shape) as well as store to the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Flow/#channeltype","title":"ChannelType","text":"a collecive communication channel
Syntax: !flow.channel
Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.
In programs that have already been partitioned prior to being compiled there will often exist only one channel and flow.channel.default
can be used to reference it. In programs that model SPMD behavior internally channels can be created or provided by hosting applications.
"},{"location":"reference/mlir-dialects/Flow/#dummytype","title":"DummyType","text":"Syntax: !flow.dummy
"},{"location":"reference/mlir-dialects/HAL/","title":"HAL","text":""},{"location":"reference/mlir-dialects/HAL/#hal-dialect","title":"'hal' Dialect","text":"A dialect representing operations against the IREE HAL.
This can be thought of as a Vulkan-like model with all of the graphics bits chopped out.
The type set is limited to those that can be represented in the IREE HAL design: buffers and views, synchronization primitives like semaphores, and and command buffers. The intent is that if a device could implement the HAL interface the sequencer ops could run on that device, such as being able to run on a GPU via indirect command buffers.
Though this is mostly a 1:1 mapping to the iree::hal API there are some methods omitted as they are not likely to be needed in IR. It's assumed that either sequencer interfaces will encapsulate the logic (such as device resolution) or that certain features are unsafe to expose to user-defined input.
- 'hal' Dialect
- Operation definition
- Allocator ops
- hal.allocator.allocate (HAL::AllocatorAllocateOp)
- hal.allocator.import (HAL::AllocatorImportOp)
- Buffer ops
- hal.buffer.assert (HAL::BufferAssertOp)
- hal.buffer.length (HAL::BufferLengthOp)
- hal.buffer.load (HAL::BufferLoadOp)
- hal.buffer.store (HAL::BufferStoreOp)
- hal.buffer.subspan (HAL::BufferSubspanOp)
- Buffer view ops
- hal.buffer_view.assert (HAL::BufferViewAssertOp)
- hal.buffer_view.buffer (HAL::BufferViewBufferOp)
- hal.buffer_view.create (HAL::BufferViewCreateOp)
- hal.buffer_view.dim (HAL::BufferViewDimOp)
- hal.buffer_view.element_type (HAL::BufferViewElementTypeOp)
- hal.buffer_view.encoding_type (HAL::BufferViewEncodingTypeOp)
- hal.buffer_view.rank (HAL::BufferViewRankOp)
- hal.buffer_view.trace (HAL::BufferViewTraceOp)
- Channel ops
- hal.channel.create (HAL::ChannelCreateOp)
- hal.channel.rank_and_count (HAL::ChannelRankAndCountOp)
- hal.channel.split (HAL::ChannelSplitOp)
- Command buffer ops
- hal.command_buffer.begin_debug_group (HAL::CommandBufferBeginDebugGroupOp)
- hal.command_buffer.collective (HAL::CommandBufferCollectiveOp)
- hal.command_buffer.copy_buffer (HAL::CommandBufferCopyBufferOp)
- hal.command_buffer.create (HAL::CommandBufferCreateOp)
- hal.command_buffer.device (HAL::CommandBufferDeviceOp)
- hal.command_buffer.dispatch.indirect (HAL::CommandBufferDispatchIndirectOp)
- hal.command_buffer.dispatch.indirect.symbol (HAL::CommandBufferDispatchIndirectSymbolOp)
- hal.command_buffer.dispatch (HAL::CommandBufferDispatchOp)
- hal.command_buffer.dispatch.symbol (HAL::CommandBufferDispatchSymbolOp)
- hal.command_buffer.end_debug_group (HAL::CommandBufferEndDebugGroupOp)
- hal.command_buffer.execution_barrier (HAL::CommandBufferExecutionBarrierOp)
- hal.command_buffer.fill_buffer (HAL::CommandBufferFillBufferOp)
- hal.command_buffer.finalize (HAL::CommandBufferFinalizeOp)
- hal.command_buffer.push_constants (HAL::CommandBufferPushConstantsOp)
- hal.command_buffer.push_descriptor_set (HAL::CommandBufferPushDescriptorSetOp)
- Descriptor set layout ops
- hal.descriptor_set_layout.create (HAL::DescriptorSetLayoutCreateOp)
- hal.descriptor_set_layout.lookup (HAL::DescriptorSetLayoutLookupOp)
- Device ops
- hal.device.allocator (HAL::DeviceAllocatorOp)
- hal.device.query (HAL::DeviceQueryOp)
- hal.device.queue.alloca (HAL::DeviceQueueAllocaOp)
- hal.device.queue.dealloca (HAL::DeviceQueueDeallocaOp)
- hal.device.queue.execute (HAL::DeviceQueueExecuteOp)
- hal.device.queue.flush (HAL::DeviceQueueFlushOp)
- hal.device.queue.read (HAL::DeviceQueueReadOp)
- hal.device.queue.write (HAL::DeviceQueueWriteOp)
- hal.return (HAL::ReturnOp)
- Executable ops
- hal.executable.binary (HAL::ExecutableBinaryOp)
- hal.executable.calculate_workgroups (HAL::ExecutableCalculateWorkgroupsOp)
- hal.executable.condition (HAL::ExecutableConditionOp)
- hal.executable.constant.block (HAL::ExecutableConstantBlockOp)
- hal.executable.constant.load (HAL::ExecutableConstantLoadOp)
- hal.executable.create (HAL::ExecutableCreateOp)
- hal.executable_end (HAL::ExecutableEndOp)
- hal.executable.export (HAL::ExecutableExportOp)
- hal.executable.lookup (HAL::ExecutableLookupOp)
- hal.executable (HAL::ExecutableOp)
- hal.executable.source_end (HAL::ExecutableSourceEndOp)
- hal.executable.source (HAL::ExecutableSourceOp)
- hal.executable.variant_end (HAL::ExecutableVariantEndOp)
- hal.executable.variant (HAL::ExecutableVariantOp)
- Experimental ops
- hal.ex.file.from_memory (HAL::ExFileFromMemoryOp)
- hal.ex.shared_device (HAL::ExSharedDeviceOp)
- Fence ops
- hal.fence.await (HAL::FenceAwaitOp)
- hal.fence.create (HAL::FenceCreateOp)
- hal.fence.fail (HAL::FenceFailOp)
- hal.fence.join (HAL::FenceJoinOp)
- hal.fence.query (HAL::FenceQueryOp)
- hal.fence.signal (HAL::FenceSignalOp)
- Instrument ops
- hal.instrument.memory.load (HAL::InstrumentMemoryLoadOp)
- hal.instrument.memory.store (HAL::InstrumentMemoryStoreOp)
- hal.instrument.print (HAL::InstrumentPrintOp)
- hal.instrument.value (HAL::InstrumentValueOp)
- hal.instrument.workgroup (HAL::InstrumentWorkgroupOp)
- Interface ops
- hal.interface.binding.subspan (HAL::InterfaceBindingSubspanOp)
- hal.interface.constant.load (HAL::InterfaceConstantLoadOp)
- hal.interface.workgroup.count (HAL::InterfaceWorkgroupCountOp)
- hal.interface.workgroup.id (HAL::InterfaceWorkgroupIDOp)
- hal.interface.workgroup.size (HAL::InterfaceWorkgroupSizeOp)
- Pipeline layout ops
- hal.pipeline_layout.create (HAL::PipelineLayoutCreateOp)
- hal.pipeline_layout.lookup (HAL::PipelineLayoutLookupOp)
- Pseudo Ops
- hal.dispatch.extern (HAL::DispatchExternOp)
- hal.tensor.barrier (HAL::TensorBarrierOp)
- hal.tensor.export (HAL::TensorExportOp)
- hal.tensor.import (HAL::TensorImportOp)
- Attribute definition
- AffinityQueueAttr
- CollectiveAttr
- DescriptorSetBindingAttr
- DescriptorSetLayoutAttr
- DescriptorTypeAttr
- DeviceMatchArchitectureAttr
- DeviceMatchExecutableFormatAttr
- DeviceMatchFeatureAttr
- DeviceMatchIDAttr
- DeviceTargetAttr
- ExecutableObjectAttr
- ExecutableObjectsAttr
- ExecutableTargetAttr
- InterfaceBindingAttr
- MatchAllAttr
- MatchAlwaysAttr
- MatchAnyAttr
- PipelineLayoutAttr
- Type constraint definition
- allocator
- buffer
- buffer_view
- collective.channel
- command_buffer
- descriptor_set_layout
- device
- event
- executable
- fence
- buffer
- pipeline_layout
"},{"location":"reference/mlir-dialects/HAL/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HAL/#allocator-ops","title":"Allocator ops","text":"Ops for !hal.allocator
/ iree_hal_allocator_t
.
"},{"location":"reference/mlir-dialects/HAL/#halallocatorallocate-halallocatorallocateop","title":"hal.allocator.allocate
(HAL::AllocatorAllocateOp)","text":"Empty buffer allocation operation
Syntax:
operation ::= `hal.allocator.allocate` `<` $allocator `:` type($allocator) `>`\n `affinity` `(` $queue_affinity `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $result_size)\n attr-dict-with-keyword\n
Allocates a buffer of the given size from the allocator. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands","title":"Operands:","text":"Operand Description allocator
allocator queue_affinity
64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/HAL/#results","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halallocatorimport-halallocatorimportop","title":"hal.allocator.import
(HAL::AllocatorImportOp)","text":"Allocator-supported host buffer import operation
Syntax:
operation ::= `hal.allocator.import` `<` $allocator `:` type($allocator) `>`\n `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `affinity` `(` $queue_affinity `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` type($did_import) `,` type($result)\n attr-dict-with-keyword\n
Tries importing host memory backed by the given byte buffer into a device accessible !hal.buffer
. The returned buffer may be host-only and not directly usable on devices. If the mapping cannot be completed (such as trying to map the host memory as device-local on devices with discrete memory) then did_import
will indicate that the returned buffer is null.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_1","title":"Operands:","text":"Operand Description allocator
allocator queue_affinity
64-bit signless integer source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#results_1","title":"Results:","text":"Result Description did_import
1-bit signless integer result
buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-ops","title":"Buffer ops","text":"Ops for !hal.buffer
/ iree_hal_buffer_t
.
"},{"location":"reference/mlir-dialects/HAL/#halbufferassert-halbufferassertop","title":"hal.buffer.assert
(HAL::BufferAssertOp)","text":"Buffer compatibility assertion
Syntax:
operation ::= `hal.buffer.assert` `<` $buffer `:` type($buffer) `>`\n `message` `(` $message `)`\n `allocator` `(` $allocator `:` type($allocator) `)`\n `minimum_length` `(` $minimum_length `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n attr-dict-with-keyword\n
Asserts that the buffer is compatible with the given allocator and usage. Program execution will abort as if std.assert
had been used.
This only checks that the buffer can be used and not that it matches the given parameters exactly. Buffers may be from other allocators so long as the allocators are compatible (devices can address each other's memory), the type and usage contain all the requested bits (having more bits is ok), and the length is at least the requested minimum (as padding may be ignored).
"},{"location":"reference/mlir-dialects/HAL/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_2","title":"Operands:","text":"Operand Description buffer
buffer allocator
allocator minimum_length
index"},{"location":"reference/mlir-dialects/HAL/#halbufferlength-halbufferlengthop","title":"hal.buffer.length
(HAL::BufferLengthOp)","text":"Buffer byte length accessor
Syntax:
operation ::= `hal.buffer.length` `<` $buffer `:` type($buffer) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_3","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HAL/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbufferload-halbufferloadop","title":"hal.buffer.load
(HAL::BufferLoadOp)","text":"Buffer element load operation
Syntax:
operation ::= `hal.buffer.load` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `]`\n `:` type($result)\n attr-dict-with-keyword\n
Loads a value from a buffer by mapping it.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_4","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index"},{"location":"reference/mlir-dialects/HAL/#results_3","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/HAL/#halbufferstore-halbufferstoreop","title":"hal.buffer.store
(HAL::BufferStoreOp)","text":"Buffer element store operation
Syntax:
operation ::= `hal.buffer.store` `<` $target_buffer `:` type($target_buffer) `>`\n `` `[` $target_offset `]`\n `value` `(` $value `:` type($value) `)`\n attr-dict-with-keyword\n
Stores a value into a buffer by mapping it.
"},{"location":"reference/mlir-dialects/HAL/#operands_5","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target_buffer
buffer target_offset
index"},{"location":"reference/mlir-dialects/HAL/#halbuffersubspan-halbuffersubspanop","title":"hal.buffer.subspan
(HAL::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `hal.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SizeAwareOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_6","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#results_4","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-view-ops","title":"Buffer view ops","text":"Ops for !hal.buffer_view
/ iree_hal_buffer_view_t
.
"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewassert-halbufferviewassertop","title":"hal.buffer_view.assert
(HAL::BufferViewAssertOp)","text":"Buffer view contents assertion
Syntax:
operation ::= `hal.buffer_view.assert` `<` $buffer_view `:` type($buffer_view) `>`\n `message` `(` $message `)`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n attr-dict-with-keyword\n
Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if std.assert
had been used.
"},{"location":"reference/mlir-dialects/HAL/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_7","title":"Operands:","text":"Operand Description buffer_view
buffer_view element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewbuffer-halbufferviewbufferop","title":"hal.buffer_view.buffer
(HAL::BufferViewBufferOp)","text":"Buffer view buffer accessor
Syntax:
operation ::= `hal.buffer_view.buffer` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the buffer backing this view's contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_8","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_5","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewcreate-halbufferviewcreateop","title":"hal.buffer_view.create
(HAL::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `hal.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_9","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_6","title":"Results:","text":"Result Description result
buffer_view"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewdim-halbufferviewdimop","title":"hal.buffer_view.dim
(HAL::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `hal.buffer_view.dim` `<` $buffer_view `:` type($buffer_view) `>`\n `` `[` $index `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_10","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_7","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewelement_type-halbufferviewelementtypeop","title":"hal.buffer_view.element_type
(HAL::BufferViewElementTypeOp)","text":"Buffer view element type query
Syntax:
operation ::= `hal.buffer_view.element_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the element type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_11","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_8","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewencoding_type-halbufferviewencodingtypeop","title":"hal.buffer_view.encoding_type
(HAL::BufferViewEncodingTypeOp)","text":"Buffer view encoding type query
Syntax:
operation ::= `hal.buffer_view.encoding_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the encoding type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_12","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_9","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewrank-halbufferviewrankop","title":"hal.buffer_view.rank
(HAL::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `hal.buffer_view.rank` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_13","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_10","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewtrace-halbufferviewtraceop","title":"hal.buffer_view.trace
(HAL::BufferViewTraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `hal.buffer_view.trace` $key `=`\n $operands `:` type($operands)\n attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.
"},{"location":"reference/mlir-dialects/HAL/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_14","title":"Operands:","text":"Operand Description operands
variadic of buffer_view"},{"location":"reference/mlir-dialects/HAL/#channel-ops","title":"Channel ops","text":"Ops for !hal.channel
/ iree_hal_channel_t
.
"},{"location":"reference/mlir-dialects/HAL/#halchannelcreate-halchannelcreateop","title":"hal.channel.create
(HAL::ChannelCreateOp)","text":"Creates a new channel for collective communication
Syntax:
operation ::= `hal.channel.create` `device` `(` $device `:` type($device) `)`\n `affinity` `(` $queue_affinity `)`\n `flags` `(` $flags `)`\n `id` `(` $id `)`\n `group` `(` $group `)`\n `rank` `(` $rank `)`\n `count` `(` $count `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a new channel with the given rank associated with the given device queue. Collective operations using this channel must only be submitted on compatible queues.
The group and ID are optional and may be null. A rank or count of -1 can be used to indicate a default inherited from the environment or device configuration.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_15","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer id
a reference counted byte buffer group
a reference counted byte buffer rank
32-bit signless integer count
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_11","title":"Results:","text":"Result Description result
collective.channel"},{"location":"reference/mlir-dialects/HAL/#halchannelrank_and_count-halchannelrankandcountop","title":"hal.channel.rank_and_count
(HAL::ChannelRankAndCountOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `hal.channel.rank_and_count` `<` $channel `:` type($channel) `>`\n `:` type($rank) `,` type($count)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
and the total participant count.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_16","title":"Operands:","text":"Operand Description channel
collective.channel"},{"location":"reference/mlir-dialects/HAL/#results_12","title":"Results:","text":"Result Description rank
32-bit signless integer count
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halchannelsplit-halchannelsplitop","title":"hal.channel.split
(HAL::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `hal.channel.split` `<` $channel `:` type($channel) `>`\n `color` `(` $color `)`\n `key` `(` $key `)`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_17","title":"Operands:","text":"Operand Description channel
collective.channel color
32-bit signless integer key
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_13","title":"Results:","text":"Result Description result
collective.channel"},{"location":"reference/mlir-dialects/HAL/#command-buffer-ops","title":"Command buffer ops","text":"Ops for !hal.command_buffer
/ iree_hal_command_buffer_t
.
"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferbegin_debug_group-halcommandbufferbegindebuggroupop","title":"hal.command_buffer.begin_debug_group
(HAL::CommandBufferBeginDebugGroupOp)","text":"Pushes a command buffer debug group label
Syntax:
operation ::= `hal.command_buffer.begin_debug_group` `<` $command_buffer `:` type($command_buffer) `>`\n `label` `(` $label `)`\n attr-dict-with-keyword\n
Pushes a new debug group with the given label. All commands between this and a mandatory matching call to hal.command_buffer.end_debug_group
will be grouped together with the given label.
"},{"location":"reference/mlir-dialects/HAL/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription label
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_18","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercollective-halcommandbuffercollectiveop","title":"hal.command_buffer.collective
(HAL::CommandBufferCollectiveOp)","text":"Command buffer collective dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.collective` `<` $command_buffer `:` type($command_buffer) `>`\n `channel` `(` $channel `:` type($channel) `)`\n `op` `(` $op `)`\n (`param` `(` $param^ `:` type($param) `)`)?\n (`send` `(` $send_buffer^ `:` type($send_buffer) `)`\n `` `[` $send_offset `,` $send_length `]`)?\n (`recv` `(` $recv_buffer^ `:` type($recv_buffer) `)`\n `` `[` $recv_offset `,` $recv_length `]`)?\n `count` `(` $element_count `)`\n attr-dict-with-keyword\n
Dispatches a collective operation defined by op using the given buffers.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/HAL/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::HAL::CollectiveAttrcollective operation and specification"},{"location":"reference/mlir-dialects/HAL/#operands_19","title":"Operands:","text":"Operand Description command_buffer
command_buffer channel
collective.channel element_count
index param
32-bit signless integer send_buffer
buffer send_offset
index send_length
index recv_buffer
buffer recv_offset
index recv_length
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercopy_buffer-halcommandbuffercopybufferop","title":"hal.command_buffer.copy_buffer
(HAL::CommandBufferCopyBufferOp)","text":"Command buffer buffer copy recording operation
Syntax:
operation ::= `hal.command_buffer.copy_buffer` `<` $command_buffer `:` type($command_buffer) `>`\n `source` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n attr-dict-with-keyword\n
Copies a range of one buffer to another.
"},{"location":"reference/mlir-dialects/HAL/#operands_20","title":"Operands:","text":"Operand Description command_buffer
command_buffer source_buffer
buffer source_offset
index target_buffer
buffer target_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercreate-halcommandbuffercreateop","title":"hal.command_buffer.create
(HAL::CommandBufferCreateOp)","text":"Command buffer allocation operation
Syntax:
operation ::= `hal.command_buffer.create` `device` `(` $device `:` type($device) `)`\n `mode` `(` $modes `)`\n `categories` `(` $command_categories `)`\n (`bindings` `(` $binding_capacity^ `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a command buffer from the device pool ready to begin recording.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription modes
mlir::iree_compiler::IREE::HAL::CommandBufferModeBitfieldAttrvalid CommandBufferMode command_categories
mlir::iree_compiler::IREE::HAL::CommandCategoryBitfieldAttrvalid CommandCategory"},{"location":"reference/mlir-dialects/HAL/#operands_21","title":"Operands:","text":"Operand Description device
device binding_capacity
index"},{"location":"reference/mlir-dialects/HAL/#results_14","title":"Results:","text":"Result Description result
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdevice-halcommandbufferdeviceop","title":"hal.command_buffer.device
(HAL::CommandBufferDeviceOp)","text":"Command buffer device query operation
Syntax:
operation ::= `hal.command_buffer.device` `<` $command_buffer `:` type($command_buffer) `>`\n `:` type($device)\n attr-dict-with-keyword\n
Used during conversion to access the device used to create a command buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_22","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#results_15","title":"Results:","text":"Result Description device
device"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirect-halcommandbufferdispatchindirectop","title":"hal.command_buffer.dispatch.indirect
(HAL::CommandBufferDispatchIndirectOp)","text":"Command buffer indirect dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.dispatch.indirect` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n `` `[` $workgroups_offset `]`\n attr-dict-with-keyword\n
Dispatches an execution request with the dispatch parameters loaded from the given buffer.
"},{"location":"reference/mlir-dialects/HAL/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_23","title":"Operands:","text":"Operand Description command_buffer
command_buffer executable
executable workgroups_buffer
buffer workgroups_offset
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirectsymbol-halcommandbufferdispatchindirectsymbolop","title":"hal.command_buffer.dispatch.indirect.symbol
(HAL::CommandBufferDispatchIndirectSymbolOp)","text":"Command buffer indirect dispatch recording operation, using symbolref
Syntax:
operation ::= `hal.command_buffer.dispatch.indirect.symbol` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $entry_point `)`\n `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n `` `[` $workgroups_offset `]`\n attr-dict-with-keyword\n
Dispatches an execution request with the dispatch parameters loaded from the given buffer, using using a nested symbol reference to the entry point.
hal.command_buffer.dispatch.indirect.symbol %cmd, @executable::@target::@entry,\n workgroups = %buffer[%offset]\n
"},{"location":"reference/mlir-dialects/HAL/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_24","title":"Operands:","text":"Operand Description command_buffer
command_buffer workgroups_buffer
buffer workgroups_offset
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatch-halcommandbufferdispatchop","title":"hal.command_buffer.dispatch
(HAL::CommandBufferDispatchOp)","text":"Command buffer dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.dispatch` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n attr-dict-with-keyword\n
Dispatches an execution request.
"},{"location":"reference/mlir-dialects/HAL/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_25","title":"Operands:","text":"Operand Description command_buffer
command_buffer executable
executable workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchsymbol-halcommandbufferdispatchsymbolop","title":"hal.command_buffer.dispatch.symbol
(HAL::CommandBufferDispatchSymbolOp)","text":"Command buffer dispatch recording operation, using symbolref
Syntax:
operation ::= `hal.command_buffer.dispatch.symbol` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $entry_point `)`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n attr-dict-with-keyword\n
Dispatches an execution request, using a nested symbol reference to the entry point.
"},{"location":"reference/mlir-dialects/HAL/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_26","title":"Operands:","text":"Operand Description command_buffer
command_buffer workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferend_debug_group-halcommandbufferenddebuggroupop","title":"hal.command_buffer.end_debug_group
(HAL::CommandBufferEndDebugGroupOp)","text":"Pops a command buffer debug group label
Syntax:
operation ::= `hal.command_buffer.end_debug_group` `<` $command_buffer `:` type($command_buffer) `>`\n attr-dict-with-keyword\n
Pops a debug group from the stack.
"},{"location":"reference/mlir-dialects/HAL/#operands_27","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferexecution_barrier-halcommandbufferexecutionbarrierop","title":"hal.command_buffer.execution_barrier
(HAL::CommandBufferExecutionBarrierOp)","text":"Command buffer execution barrier recording operation
Syntax:
operation ::= `hal.command_buffer.execution_barrier` `<` $command_buffer `:` type($command_buffer) `>`\n `source` `(` $source_stage_mask `)`\n `target` `(` $target_stage_mask `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Defines an execution dependency between all commands recorded before the barrier and all commands recorded after the barrier. Only the stages provided will be affected.
"},{"location":"reference/mlir-dialects/HAL/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription source_stage_mask
mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage target_stage_mask
mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage flags
mlir::iree_compiler::IREE::HAL::ExecutionBarrierFlagBitfieldAttrvalid ExecutionBarrierFlag"},{"location":"reference/mlir-dialects/HAL/#operands_28","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfill_buffer-halcommandbufferfillbufferop","title":"hal.command_buffer.fill_buffer
(HAL::CommandBufferFillBufferOp)","text":"Command buffer buffer fill recording operation
Syntax:
operation ::= `hal.command_buffer.fill_buffer` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `,` $length `]`\n `pattern` `(` $pattern `:` type($pattern) `)`\n attr-dict-with-keyword\n
Fills the target buffer with the given repeating value.
"},{"location":"reference/mlir-dialects/HAL/#operands_29","title":"Operands:","text":"Operand Description command_buffer
command_buffer target_buffer
buffer target_offset
index length
index pattern
8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfinalize-halcommandbufferfinalizeop","title":"hal.command_buffer.finalize
(HAL::CommandBufferFinalizeOp)","text":"Finalizes command buffer recording
Syntax:
operation ::= `hal.command_buffer.finalize` `<` $command_buffer `:` type($command_buffer) `>`\n attr-dict-with-keyword\n
Ends recording into the command buffer and prepares it for submission. No more commands may be recorded into the command buffer.
"},{"location":"reference/mlir-dialects/HAL/#operands_30","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_constants-halcommandbufferpushconstantsop","title":"hal.command_buffer.push_constants
(HAL::CommandBufferPushConstantsOp)","text":"Command buffer push constants operation
Syntax:
operation ::= `hal.command_buffer.push_constants` `<` $command_buffer `:` type($command_buffer) `>`\n `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n `offset` `(` $offset `)`\n `values` `(` `[` $values `]` `)`\n `:` type($values)\n attr-dict-with-keyword\n
Pushes an inline set of constants that can be accessed by subsequent dispatches using a compatible pipeline layout.
Push constants are always 4-byte values and treated as opaque, meaning that they may be bit-casted floats, bit-packed booleans, etc.
"},{"location":"reference/mlir-dialects/HAL/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription offset
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_31","title":"Operands:","text":"Operand Description command_buffer
command_buffer pipeline_layout
pipeline_layout values
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_descriptor_set-halcommandbufferpushdescriptorsetop","title":"hal.command_buffer.push_descriptor_set
(HAL::CommandBufferPushDescriptorSetOp)","text":"Command buffer descriptor set push binding operation
Syntax:
operation ::= `hal.command_buffer.push_descriptor_set` `<` $command_buffer `:` type($command_buffer) `>`\n `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n `` `[` $set `]`\n `bindings` `(` `[`\n custom<DescriptorSetBindings>($binding_ordinals,\n $binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Pushes an inline-defined descriptor set to the command buffer. The provided buffers may either be HAL buffers or indirect references into the command buffer binding table.
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/HAL/#operands_32","title":"Operands:","text":"Operand Description command_buffer
command_buffer pipeline_layout
pipeline_layout set
index binding_ordinals
variadic of index binding_buffers
variadic of index or buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HAL/#descriptor-set-layout-ops","title":"Descriptor set layout ops","text":"Ops for !hal.descriptor_set_layout
/ iree_hal_descriptor_set_layout_t
.
"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutcreate-haldescriptorsetlayoutcreateop","title":"hal.descriptor_set_layout.create
(HAL::DescriptorSetLayoutCreateOp)","text":"Creates a descriptor set layout
Syntax:
operation ::= `hal.descriptor_set_layout.create` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `bindings` `(` $bindings `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a descriptor set layout that defines the bindings used within a set. The same descriptor set layout may be shared with many different executable layouts and by doing so some runtime binding overhead when switching between executables that use the same set layouts can be reduced.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags bindings
::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_33","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_16","title":"Results:","text":"Result Description result
descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutlookup-haldescriptorsetlayoutlookupop","title":"hal.descriptor_set_layout.lookup
(HAL::DescriptorSetLayoutLookupOp)","text":"Descriptor set layout cache lookup pseudo-op
Syntax:
operation ::= `hal.descriptor_set_layout.lookup` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `bindings` `(` $bindings `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized descriptor set layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags bindings
::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_34","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_17","title":"Results:","text":"Result Description result
descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#device-ops","title":"Device ops","text":"Ops for !hal.device
/ iree_hal_device_t
.
"},{"location":"reference/mlir-dialects/HAL/#haldeviceallocator-haldeviceallocatorop","title":"hal.device.allocator
(HAL::DeviceAllocatorOp)","text":"Device allocator accessor operation
Syntax:
operation ::= `hal.device.allocator` `<` $device `:` type($device) `>` `:` type($result) attr-dict-with-keyword\n
Returns the allocator that can be used to allocate buffers compatible with the device.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_35","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_18","title":"Results:","text":"Result Description result
allocator"},{"location":"reference/mlir-dialects/HAL/#haldevicequery-haldevicequeryop","title":"hal.device.query
(HAL::DeviceQueryOp)","text":"Returns a runtime configuration parameter from the device
Syntax:
operation ::= `hal.device.query` `<` $device `:` type($device) `>`\n `key` `(` $category `:` `` `:` $key `)`\n `:` type($ok) `,` type($value)\n (`=` $default_value^)?\n attr-dict-with-keyword\n
Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.
This is roughly equivalent to the sysconf
linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent. If there is a HAL match attribute (#hal.device.match.*
) or op (hal.device.match.*
) prefer to use that in order to get compile-time propagation when the target is specified and elide the runtime query and get compile-time verification when a runtime query is required.
Users of the op must check the ok
result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with ex.
when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.
Well-known keys:
-
hal.executable.format :: {some format} Returns 1 if the given format is supported by the device loader.
-
hal.device :: concurrency The maximum concurrently executable submissions, mapping roughly to the queue count. The actual concurrency available may be less than this based on dynamic runtime parameters such as power/thermal modes, quota limits, or user choice.
-
hal.dispatch :: concurrency The maximum concurrently executable workgroups for a particular dispatch. The actual concurrency available may be less depending on device state.
Traits: AlwaysSpeculatableImplTrait, HAL_DeviceQuery
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription category
::mlir::StringAttrstring attribute key
::mlir::StringAttrstring attribute default_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/HAL/#operands_36","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_19","title":"Results:","text":"Result Description ok
1-bit signless integer value
any type"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuealloca-haldevicequeueallocaop","title":"hal.device.queue.alloca
(HAL::DeviceQueueAllocaOp)","text":"Allocates a queue-ordered transient buffer
Syntax:
operation ::= `hal.device.queue.alloca` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `pool` `(` $pool `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $result_size)\n attr-dict-with-keyword\n
Returns a queue-ordered transient buffer that will be available for use when the signal fence is reached. The allocation will not be made until the wait fence has been reached.
The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
The buffer handle will remain live so long as there are retainers but the contents are undefined before the allocation signal fence has been signaled and after the deallocation wait fence has been reached.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_37","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence pool
64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/HAL/#results_20","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuedealloca-haldevicequeuedeallocaop","title":"hal.device.queue.dealloca
(HAL::DeviceQueueDeallocaOp)","text":"Deallocates a queue-ordered transient buffer
Syntax:
operation ::= `hal.device.queue.dealloca` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `buffer` `(` $buffer `:` type($buffer) `)`\n attr-dict-with-keyword\n
Deallocates a queue-ordered transient buffer. The deallocation will not be made until the wait fence has been reached and once the storage is available for reuse the signal fence will be signaled.
After deallocation the contents of the buffer may still be accessible but will have undefined contents as other operations reuse the memory.
"},{"location":"reference/mlir-dialects/HAL/#operands_38","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence buffer
buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueexecute-haldevicequeueexecuteop","title":"hal.device.queue.execute
(HAL::DeviceQueueExecuteOp)","text":"Enqueues command buffer execution
Syntax:
operation ::= `hal.device.queue.execute` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n (`commands` `(` `[` $command_buffers^ `]` `)`)?\n attr-dict-with-keyword\n
Executes one or more command buffers on a device queue. The command buffers are executed in order as if they were recorded as one. No commands will execute until the wait fence has been reached and the signal fence will be signaled when all commands have completed.
"},{"location":"reference/mlir-dialects/HAL/#operands_39","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence command_buffers
variadic of command_buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueflush-haldevicequeueflushop","title":"hal.device.queue.flush
(HAL::DeviceQueueFlushOp)","text":"Flushes locally-pending submissions to the queue
Syntax:
operation ::= `hal.device.queue.flush` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n attr-dict-with-keyword\n
Flushes any locally-pending submissions in the queue. When submitting many queue operations this can be used to eagerly flush earlier submissions while later ones are still being constructed. This may be a no-op.
"},{"location":"reference/mlir-dialects/HAL/#operands_40","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueread-haldevicequeuereadop","title":"hal.device.queue.read
(HAL::DeviceQueueReadOp)","text":"Reads a segment from a file into a device buffer
Syntax:
operation ::= `hal.device.queue.read` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` $source_file `:` type($source_file) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Enqueues a file read operation that streams a segment of the source file defined by the source offset and length into the target HAL buffer at the specified target offset. The queue affinity should be set to where the target buffer will be consumed. The source file must have read permission and the target buffer must have transfer-target usage. Read failure will result in propagated semaphore failure or device loss.
"},{"location":"reference/mlir-dialects/HAL/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_41","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_file
buffer source_offset
64-bit signless integer target_buffer
buffer target_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuewrite-haldevicequeuewriteop","title":"hal.device.queue.write
(HAL::DeviceQueueWriteOp)","text":"Writes a segment from a device buffer into a file
Syntax:
operation ::= `hal.device.queue.write` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_file `:` type($target_file) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Enqueues a file write operation that streams a segment of the source HAL buffer defined by the source offset and length into the target file at the specified target offset. The queue affinity should be set to where the source buffer was produced. The source buffer must have transfer-source usage and the target file must have write permission. Write failure will result in propagated semaphore failure or device loss.
"},{"location":"reference/mlir-dialects/HAL/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_42","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_buffer
buffer source_offset
index target_file
buffer target_offset
64-bit signless integer length
index"},{"location":"reference/mlir-dialects/HAL/#halreturn-halreturnop","title":"hal.return
(HAL::ReturnOp)","text":"Return from a hal.* region
Syntax:
operation ::= `hal.return` ($operands^ `:` type($operands))? attr-dict\n
Returns the given values from the region and back to the host code.
Traits: Terminator
"},{"location":"reference/mlir-dialects/HAL/#operands_43","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#executable-ops","title":"Executable ops","text":"Ops for !hal.executable
/ iree_hal_executable_t
.
"},{"location":"reference/mlir-dialects/HAL/#halexecutablebinary-halexecutablebinaryop","title":"hal.executable.binary
(HAL::ExecutableBinaryOp)","text":"Compiled executable binary data
Syntax:
operation ::= `hal.executable.binary` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n
A compiled executable binary with an optional nested module containing the IR prior to serialization (for debugging).
Traits: HasParent
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute format
::mlir::StringAttrstring attribute data
::mlir::DenseIntElementsAttr8-bit signless integer elements attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablecalculate_workgroups-halexecutablecalculateworkgroupsop","title":"hal.executable.calculate_workgroups
(HAL::ExecutableCalculateWorkgroupsOp)","text":"Calculates workgroup count from workload for an exported function
Syntax:
operation ::= `hal.executable.calculate_workgroups` `device` `(` $device `:` type($device) `)`\n `target` `(` $entry_point `)`\n (`workload` `(` `[` $workload^ `]` `)`)?\n `:` type($workgroup_x) `,` type($workgroup_y) `,` type($workgroup_z)\n attr-dict-with-keyword\n
Calculates the workgroup count (grid XYZ) based on the given workload using the workgroup count calculation region of the target hal.executable.export
op.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_44","title":"Operands:","text":"Operand Description device
device workload
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_21","title":"Results:","text":"Result Description workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halexecutablecondition-halexecutableconditionop","title":"hal.executable.condition
(HAL::ExecutableConditionOp)","text":"Host code to determine if the executable is enabled
Variants are selected based on their target and this optional condition op that returns true if the variant is valid for use on the provided runtime !hal.device
. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantblock-halexecutableconstantblockop","title":"hal.executable.constant.block
(HAL::ExecutableConstantBlockOp)","text":"Executable constant block initializer
Initializes one or more constants in the executable constant block by returning one value per identified constant. Each constant block is evaluated on the host prior to instantiating the executable for a given device and allows for the executable to be specialized based on device capabilities and limits.
The keys specified are unique per variant and will be deduplicated across multiple constant blocks when present. They are only used during lowering and will not survive to runtime so they need only have descriptive enough names to avoid collisions and represent the semantics of the value.
Constant values can be loaded in the device code with the hal.executable.constant.load
op:
hal.executable.variant public @target {\n hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as (\"foo\", \"bar\") {\n %0 = hal.device.query<%device> key(\"some.device.prop\")...\n %1 = hal.device.query<%device> key(\"another.device.prop\")...\n hal.return %0, %1 : i32, i32\n }\n builtin.module {\n func @dispatch0() {\n %0 = hal.executable.constant.load \"foo\" : i32\n %1 = hal.executable.constant.load \"bar\" : i32\n return\n }\n }\n}\n
Each target backend will implement the constant initialization and access in a way compatible with its execution model. Examples: - CPU: read-only buffer initialized on load and passed to each dispatch - CUDA: read-only buffer initialized on load and passed to each dispatch - SPIR-V: specialization constants - Metal: function constants - WebGPU: pipeline-overridable constants
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type keys
::mlir::ArrayAttrarray attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantload-halexecutableconstantloadop","title":"hal.executable.constant.load
(HAL::ExecutableConstantLoadOp)","text":"Loads a constant value from the executable constant block
Syntax:
operation ::= `hal.executable.constant.load` $key attr-dict `:` type($result)\n
Loads a scalar constant value from the static executable constant block. The value provided by a constant block with the given key will be loaded and bitcast (possibly with truncation or zero-extension) to the result type.
Note that backends are allowed to implement their own mechanisms for referencing constant block values and this is provided only as a default for those not needing special behavior.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#results_22","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halexecutablecreate-halexecutablecreateop","title":"hal.executable.create
(HAL::ExecutableCreateOp)","text":"Creates an executable
Syntax:
operation ::= `hal.executable.create` `device` `(` $device `:` type($device) `)`\n `target` `(` $executable_target `)`\n `layouts` `(` `[` $layouts `]` `)`\n (`constants` `(` `[` $constants^ `]` `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Creates a target-dependent executable cached on the provided device. Entry points contained within the executable can be dispatched using the resulting executable handle.
Depending on the driver creation may take a non-trivial amount of time (such as when JITing/etc). As the cache is internally synchronized callers can issue preparation requests from multiple threads - even for the same executables - and calls will block until preparation completes.
Optional constants provide for specialization of the executable based on runtime-derived parameters.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription executable_target
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_45","title":"Operands:","text":"Operand Description device
device layouts
variadic of pipeline_layout constants
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_23","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable_end-halexecutableendop","title":"hal.executable_end
(HAL::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `hal.executable_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutableexport-halexecutableexportop","title":"hal.executable.export
(HAL::ExecutableExportOp)","text":"
Executable entry point declaration
An entry point exported by the executable with statically-available information describing the IO interface it uses and other dispatch metadata.
The workgroup_count
region represents the computation that returns the number of workgroups to use in the 3D grid dispatch. The arguments to the region represents the workload as captured by each dispatch. It returns the number of workgroups along x, y, and z.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrsize_t layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablelookup-halexecutablelookupop","title":"hal.executable.lookup
(HAL::ExecutableLookupOp)","text":"Executable cache lookup pseudo-op
Syntax:
operation ::= `hal.executable.lookup` `device` `(` $device `:` type($device) `)`\n `executable` `(` $executable `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription executable
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_46","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_24","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable-halexecutableop","title":"hal.executable
(HAL::ExecutableOp)","text":"Target-specific executable module
Syntax:
operation ::= `hal.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module representing a target-specific compiled kernel/shader/etc.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource_end-halexecutablesourceendop","title":"hal.executable.source_end
(HAL::ExecutableSourceEndOp)","text":"Terminator pseudo-op for the executable source op
Syntax:
operation ::= `hal.executable.source_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource-halexecutablesourceop","title":"hal.executable.source
(HAL::ExecutableSourceOp)","text":"
Generic source contents of an executable op
Syntax:
operation ::= `hal.executable.source` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n ``\n $body\n
This is an unspecialized source representation of an executable module without an assigned target. This is useful for hand-authoring executables prior to device specification.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute objects
::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant_end-halexecutablevariantendop","title":"hal.executable.variant_end
(HAL::ExecutableVariantEndOp)","text":"Terminator pseudo-op for the executable variant op
Syntax:
operation ::= `hal.executable.variant_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant-halexecutablevariantop","title":"hal.executable.variant
(HAL::ExecutableVariantOp)","text":"
Target-specific variant of an executable op
Syntax:
operation ::= `hal.executable.variant` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n `target` `(` $target `)`\n (`objects` `(` $objects^ `)` )?\n attr-dict-with-keyword\n $body\n
The target IR for the executable. This can be preserved for debugging but is usually removed during transformation.
Variants are selected based on their target and an optional condition op that returns true if the variant is valid for use on the provided runtime !hal.device
. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.
Traits: HasParent, IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute target
::mlir::iree_compiler::IREE::HAL::ExecutableTargetAttrgeneric executable target specification objects
::mlir::ArrayAttrHAL executable object references"},{"location":"reference/mlir-dialects/HAL/#experimental-ops","title":"Experimental ops","text":"Temporary hack ops expected to be removed in the future.
"},{"location":"reference/mlir-dialects/HAL/#halexfilefrom_memory-halexfilefrommemoryop","title":"hal.ex.file.from_memory
(HAL::ExFileFromMemoryOp)","text":"Creates a file mapped into a byte range of a host buffer
Syntax:
operation ::= `hal.ex.file.from_memory` `device` `(` $device `:` type($device) `)`\n `affinity` `(` $queue_affinity `)`\n `access` `(` $access `)`\n `buffer` `(` $buffer `:` type($buffer) `)`\n `` `[` $offset `for` $length `]`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a file handle that is backed by the given buffer
contents. Behavior is undefined if the buffer contents change while the accesses are in-flight.
Experimental as the exact interface for getting files from module contents still needs iteration. Most hardware APIs require a file descriptor or native platform handle but here we only have host pointers. When memory-mapped some systems allow for retrieval of the platform handle from a virtual address (GetMappedFileNameA/posix_mem_offset) but the APIs are sketchy and likely slow. Instead we should probably have a way to query for a file handle derived from the calling module by stack-walking and asking the VM module for its handle. Until we can figure this out this method will be marked epxerimental.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription access
mlir::iree_compiler::IREE::HAL::MemoryAccessBitfieldAttrvalid MemoryAccess"},{"location":"reference/mlir-dialects/HAL/#operands_47","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer buffer
a reference counted byte buffer offset
index length
index flags
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_25","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halexshared_device-halexshareddeviceop","title":"hal.ex.shared_device
(HAL::ExSharedDeviceOp)","text":"Syntax:
operation ::= `hal.ex.shared_device` attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#results_26","title":"Results:","text":"Result Description result
device"},{"location":"reference/mlir-dialects/HAL/#fence-ops","title":"Fence ops","text":"Ops for !hal.fence
/ iree_hal_fence_t
.
"},{"location":"reference/mlir-dialects/HAL/#halfenceawait-halfenceawaitop","title":"hal.fence.await
(HAL::FenceAwaitOp)","text":"Asynchronous fence wait operation
Syntax:
operation ::= `hal.fence.await` `until` `(` `[` $fences `]` `)`\n `timeout_millis` `(` $timeout_millis `)`\n `:` type($status)\n attr-dict-with-keyword\n
Yields the caller until all fences is reached. Returns the status
of the fence after the wait, with a non-zero value indicating failure.
Traits: Util_YieldPoint
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_48","title":"Operands:","text":"Operand Description timeout_millis
32-bit signless integer fences
variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_27","title":"Results:","text":"Result Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencecreate-halfencecreateop","title":"hal.fence.create
(HAL::FenceCreateOp)","text":"Creates an unsignaled fence
Syntax:
operation ::= `hal.fence.create` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a fence that defines a point in time. By default fences will remain unsignaled unless they are explicitly signaled with hal.fence.signal
or asynchronously signaled by the device by passing them as an operand to queue submission ops.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/HAL/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
mlir::iree_compiler::IREE::HAL::FenceFlagBitfieldAttrvalid FenceFlag"},{"location":"reference/mlir-dialects/HAL/#operands_49","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_28","title":"Results:","text":"Result Description result
fence"},{"location":"reference/mlir-dialects/HAL/#halfencefail-halfencefailop","title":"hal.fence.fail
(HAL::FenceFailOp)","text":"Fence failure operation
Syntax:
operation ::= `hal.fence.fail` `<` $fence `:` type($fence) `>`\n `status` `(` $status `)`\n attr-dict-with-keyword\n
Signals the fence with a failure. The status
will be returned from each timepoint semaphores hal.semaphore.query
and hal.semaphore.signal
for the lifetime of each semaphore.
"},{"location":"reference/mlir-dialects/HAL/#operands_50","title":"Operands:","text":"Operand Description fence
fence status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencejoin-halfencejoinop","title":"hal.fence.join
(HAL::FenceJoinOp)","text":"Creates a fence from the given timepoints
Syntax:
operation ::= `hal.fence.join` `at` `(` `[` $fences `]` `)`\n `->` type($result)\n attr-dict-with-keyword\n
Returns a fence that joins the input fences as a wait-all operation.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_51","title":"Operands:","text":"Operand Description fences
variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_29","title":"Results:","text":"Result Description result
fence"},{"location":"reference/mlir-dialects/HAL/#halfencequery-halfencequeryop","title":"hal.fence.query
(HAL::FenceQueryOp)","text":"Fence query operation
Syntax:
operation ::= `hal.fence.query` `<` $fence `:` type($fence) `>`\n `:` type($status)\n attr-dict-with-keyword\n
Queries whether the fence has been reached and its status. Returns OK if the fence has been signaled successfully, DEFERRED if it is unsignaled, and otherwise an error indicating the failure.
"},{"location":"reference/mlir-dialects/HAL/#operands_52","title":"Operands:","text":"Operand Description fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_30","title":"Results:","text":"Result Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencesignal-halfencesignalop","title":"hal.fence.signal
(HAL::FenceSignalOp)","text":"Fence signal operation
Syntax:
operation ::= `hal.fence.signal` `<` $fence `:` type($fence) `>`\n attr-dict-with-keyword\n
Signals the fence to indicate that the timepoints contained have been reached. Waiting work may begin immediately.
"},{"location":"reference/mlir-dialects/HAL/#operands_53","title":"Operands:","text":"Operand Description fence
fence"},{"location":"reference/mlir-dialects/HAL/#instrument-ops","title":"Instrument ops","text":"Ops for !hal.instrument.*
.
"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemoryload-halinstrumentmemoryloadop","title":"hal.instrument.memory.load
(HAL::InstrumentMemoryLoadOp)","text":"Emits a memory load instrumentation event
Syntax:
operation ::= `hal.instrument.memory.load` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $base `[` $indices `]` `,` $loadValue\n attr-dict `:` type($base) `,` type($result)\n
Emits a workgroup-specific memory load event indicating that a number of bytes from the given resolved pointer have been loaded by the workgroup.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_54","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index loadValue
any type base
memref of any type values indices
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_31","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemorystore-halinstrumentmemorystoreop","title":"hal.instrument.memory.store
(HAL::InstrumentMemoryStoreOp)","text":"Emits a memory store instrumentation event
Syntax:
operation ::= `hal.instrument.memory.store` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $base `[` $indices `]` `,` $storeValue\n attr-dict `:` type($base) `,` type($result)\n
Emits a workgroup-specific memory store event indicating that a number of bytes have been stored to the given resolved pointer by the workgroup.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_55","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index storeValue
any type base
memref of any type values indices
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_32","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentprint-halinstrumentprintop","title":"hal.instrument.print
(HAL::InstrumentPrintOp)","text":"Emits a human-readable printf-style string event
Syntax:
operation ::= `hal.instrument.print` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $format (`*` `(` $values^ `:` type($values) `)`)?\n attr-dict\n
Formats a string using a limited subset of printf format specifiers and the provided values and then emits an iree_instrument_dispatch_print_t
event. Final formatted string lengths may be limited to as much as 1024 characters and should be kept as small as possible to avoid easily exceeding the instrumentation storage buffers with redundant strings.
"},{"location":"reference/mlir-dialects/HAL/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_56","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index values
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentvalue-halinstrumentvalueop","title":"hal.instrument.value
(HAL::InstrumentValueOp)","text":"Emits a scalar value instrumentation event
Syntax:
operation ::= `hal.instrument.value` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $ordinal `=` $operand attr-dict `:` type($operand)\n
Emits a workgroup-specific typed value with the given workgroup-relative ordinal.
This op will be preserved even if the output is not used as it is only for debugging purposes.
"},{"location":"reference/mlir-dialects/HAL/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription ordinal
::mlir::IntegerAttr8-bit integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_57","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index operand
any type"},{"location":"reference/mlir-dialects/HAL/#results_33","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentworkgroup-halinstrumentworkgroupop","title":"hal.instrument.workgroup
(HAL::InstrumentWorkgroupOp)","text":"Emits a dispatch workgroup instrumentation event
Syntax:
operation ::= `hal.instrument.workgroup` `` `[` $buffer `:` type($buffer) `]`\n `dispatch` `(` $dispatchId `)`\n attr-dict `:` type($workgroupKey)\n
Emits an iree_instrument_dispatch_workgroup_t
event into the instrumentation stream. The workgroup event identifies the unique dispatch, its workgroup count, and the ID of the emitting workgroup within the dispatch. Optionally targets that support querying the processor ID executing the workgroup can attach that information for tracking purposes.
On targets such as CPUs where entire workgroups execute as atomic units only one workgroup event should be emitted. On targets such as GPUs where there may be multiple invocations executing as part of a single workgroup only the first invocation within the workgroup should emit the workgroup event (by checking if the LocalInvocationIndex or threadIdx == 0, etc).
The resulting workgroup key is used by subsequent workgroup-specific instrumentation events.
"},{"location":"reference/mlir-dialects/HAL/#operands_58","title":"Operands:","text":"Operand Description buffer
memref of any type values dispatchId
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_34","title":"Results:","text":"Result Description workgroupKey
index"},{"location":"reference/mlir-dialects/HAL/#interface-ops","title":"Interface ops","text":"Ops for !hal.interface.*
.
"},{"location":"reference/mlir-dialects/HAL/#halinterfacebindingsubspan-halinterfacebindingsubspanop","title":"hal.interface.binding.subspan
(HAL::InterfaceBindingSubspanOp)","text":"Returns an alias to a subspan of interface binding data
Syntax:
operation ::= `hal.interface.binding.subspan` `set` `(` $set `)`\n `binding` `(` $binding `)`\n `type` `(` custom<DescriptorType>($descriptor_type) `)`\n (`alignment` `(` $alignment^ `)`)?\n (`offset` `(` $byte_offset^ `)`)?\n (`flags` `(` $descriptor_flags^ `)`)?\n attr-dict `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Returns a subspan of an interface binding storage buffer in a generic type. The exact shape, type, and alignment of the returned type are defined by the result type (tensor, memref, etc).
An optional alignment indicates the byte alignment of the base binding resource. Note that the byte offset is added to the base and the alignment will be the minimum of the two.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription set
::mlir::IntegerAttrindex attribute binding
::mlir::IntegerAttrindex attribute descriptor_type
::mlir::iree_compiler::IREE::HAL::DescriptorTypeAttrvalid DescriptorType alignment
::mlir::IntegerAttrindex attribute descriptor_flags
::mlir::iree_compiler::IREE::HAL::DescriptorFlagsAttrvalid Descriptor flags"},{"location":"reference/mlir-dialects/HAL/#operands_59","title":"Operands:","text":"Operand Description byte_offset
index dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_35","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceconstantload-halinterfaceconstantloadop","title":"hal.interface.constant.load
(HAL::InterfaceConstantLoadOp)","text":"Loads a constant value from the interface constant block
Syntax:
operation ::= `hal.interface.constant.load` `` `[` $index `]`\n (`alignment` `(` $alignment^ `)`)?\n (`values` `(` $values^ `)`)?\n attr-dict `:` type($result)\n
Loads a scalar constant value from an executable IO push constant block. The value will be loaded from the given constant offset and will be bitcast (possibly with truncation or zero-extension) to the result type.
An optional alignment indicates the byte alignment of potential values for the constant when it could be determined from analysis. If omitted the value may be anything and its interpretation is up to the usage. This is intended to provide pointer alignment-like semantics to constants that are used to index into binding resources.
An optional set of values indicates all possible values that can be passed to the constant from all dispatch sites in the program. If omitted the value may be from an unanalyzable source (outside of the program, indirect, etc) and must be assumed to have any value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrsize_t alignment
::mlir::IntegerAttrindex attribute values
::mlir::ArrayAttrarray attribute"},{"location":"reference/mlir-dialects/HAL/#results_36","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupcount-halinterfaceworkgroupcountop","title":"hal.interface.workgroup.count
(HAL::InterfaceWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `hal.interface.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid. Matches what was passed to the hal.command_buffer.dispatch
command (or what was indirectly specified).
Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable.
%x = hal.interface.workgroup.count[0] : index\n%y = hal.interface.workgroup.count[1] : index\n%z = hal.interface.workgroup.count[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_37","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupid-halinterfaceworkgroupidop","title":"hal.interface.workgroup.id
(HAL::InterfaceWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `hal.interface.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current tile in the range of [0, hal.interface.workgroup.count)
along each XYZ dimension.
Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable.
%x = hal.interface.workgroup.id[0] : index\n%y = hal.interface.workgroup.id[1] : index\n%z = hal.interface.workgroup.id[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_38","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupsize-halinterfaceworkgroupsizeop","title":"hal.interface.workgroup.size
(HAL::InterfaceWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `hal.interface.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable.
%x = hal.interface.workgroup.size[0] : index\n%y = hal.interface.workgroup.size[1] : index\n%z = hal.interface.workgroup.size[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_39","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#pipeline-layout-ops","title":"Pipeline layout ops","text":"Ops for !hal.pipeline_layout
/ iree_hal_pipeline_layout_t
.
"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutcreate-halpipelinelayoutcreateop","title":"hal.pipeline_layout.create
(HAL::PipelineLayoutCreateOp)","text":"Creates an pipeline layout
Syntax:
operation ::= `hal.pipeline_layout.create` `device` `(` $device `:` type($device) `)`\n `push_constants` `(` $push_constants `)`\n `layouts` `(` `[` $set_layouts `]` `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates an pipeline layout from the given descriptor sets and push constant required size. Pipeline layouts can be shared across any executable that uses the same layout and push constant information. Sharing the layout between executables will reduce runtime binding overhead and it is often worth the cost to allow a small number of unused bindings in one executable such that it can share layouts with others that will be scheduled adjacent to it.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription push_constants
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_60","title":"Operands:","text":"Operand Description device
device set_layouts
variadic of descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#results_40","title":"Results:","text":"Result Description result
pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutlookup-halpipelinelayoutlookupop","title":"hal.pipeline_layout.lookup
(HAL::PipelineLayoutLookupOp)","text":"Pipeline layout cache lookup pseudo-op
Syntax:
operation ::= `hal.pipeline_layout.lookup` `device` `(` $device `:` type($device) `)`\n `layout` `(` $layout `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized pipeline layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification"},{"location":"reference/mlir-dialects/HAL/#operands_61","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_41","title":"Results:","text":"Result Description result
pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#pseudo-ops","title":"Pseudo Ops","text":"Pseudo ops for conversion support.
"},{"location":"reference/mlir-dialects/HAL/#haldispatchextern-haldispatchexternop","title":"hal.dispatch.extern
(HAL::DispatchExternOp)","text":"A dispatch of workgroups across a 3-dimensional grid
Syntax:
operation ::= `hal.dispatch.extern` $export\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n `count` `` custom<WorkgroupCountRegion>($workgroup_count)\n `layout` `(` $layout `)`\n (`bindings` `(` $bindings^ `)`)?\n `objects` `(` $objects `)`\n attr-dict-with-keyword\n
Dispatches some number of workgroups across a 3-dimensional grid using a function defined externally in one or more referenced objects. Objects are declared per executable target and selected automatically during linking based on where the dispatch is used. Semantically this is equivalent to a flow.dispatch.workgroups
but with the workgroup region invisible to the compiler. See hal.executable
for more information about object linkage.
Note that since this happens at tensor level the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors. Codegen guarantees this behavior but when sourcing externally authored dispatch functions it's critical that this behavior is observed.
Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the platform-dependent primitives for accessing workgroup ID, size, and count intrinsics instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc). To aid deduplication code producing these external dispatches should try not to specialize early for particular shapes and instead emit the most generic code possible as having 500 slightly different hal.dispatch.extern
ops pointing at the same object file is likely to require 500 copies of the object instead of 500 calls to the same object.
Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it. If workgroup size or distribution varies based on the target device a !hal.device
argument can be used by the workgroup count calculation region to factor in device parameters. See hal.device.query
for more information on how to query information.
%r = hal.dispatch.extern \"some_function\"[%c5, %c5](%0, %1)\n : (tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>\n ...\n
The number of results of the operation is equal to the number of results in the type signature ((tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>
). Each tensor argument and result in the type signature has a corresponding pipeline layout slot and must be declared. If multiple arguments or results share the same layout slot they can be aliased using the bindings
attribute and otherwise each is assumed unique.
There are no arguments
operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, -> %0
would tie the first argument to the result. In that case, there would be no separate block argument for the result.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, IsolatedFromAbove
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription export
::mlir::StringAttrstring attribute layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification objects
::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute bindings
::mlir::ArrayAttrHAL binding array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_62","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_42","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#haltensorbarrier-haltensorbarrierop","title":"hal.tensor.barrier
(HAL::TensorBarrierOp)","text":"Signals a fence when all tensors are available
Syntax:
operation ::= `hal.tensor.barrier` `join` `` `(` $sources `:` type($sources) `)`\n `=` `` `>`\n $signal_fence `:` type($signal_fence)\n attr-dict-with-keyword\n
Defines a barrier that is used to indicate availability of an entire set of tensors by signaling a fence. The source tensors are returned for chaining.
Interfaces: TiedOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_63","title":"Operands:","text":"Operand Description sources
variadic of tensor of any type values signal_fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_43","title":"Results:","text":"Result Description results
variadic of tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#haltensorexport-haltensorexportop","title":"hal.tensor.export
(HAL::TensorExportOp)","text":"Exports a tensor to a HAL buffer view
Syntax:
operation ::= `hal.tensor.export` $source\n ($name^)?\n (`into` `(` $target_storage^ `:` type($target_storage) `)`)?\n `:`\n custom<TypeAlias>($source_encoding, type($source)) (`{` $source_dims^ `}`)?\n `->`\n type($target)\n attr-dict\n
Defines an export of an SSA-form tensor to an external HAL buffer view.
The provided source_encoding
, if different from the source
type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.
An optional target_storage
buffer can be provided to hold the exported result. The export will fail at runtime if the storage is null or if it has insufficient capacity to store the output. The storage must be device-visible and defined for transfer-target and dispatch usage.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_64","title":"Operands:","text":"Operand Description source
tensor of any type values source_dims
variadic of index target_storage
buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_44","title":"Results:","text":"Result Description target
buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#haltensorimport-haltensorimportop","title":"hal.tensor.import
(HAL::TensorImportOp)","text":"Imports a tensor from a HAL buffer view
Syntax:
operation ::= `hal.tensor.import` (`wait` `(` $wait_fence^ `)` `=` `` `>`)?\n $source\n ($name^)?\n `:` type($source) `->`\n custom<TypeAlias>($target_encoding, type($target)) (`{` $target_dims^ `}`)?\n attr-dict\n
Defines an import of an external HAL buffer view into a SSA-form tensor. An optional semaphore timepoint can be specified indicating when the buffer view is available for use. If no semaphore timepoint is provided it is assumed the buffer view is immediately available.
The provided target_encoding
, if different from the target
type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_65","title":"Operands:","text":"Operand Description source
buffer or buffer_view target_dims
variadic of index wait_fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_45","title":"Results:","text":"Result Description target
tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/HAL/#affinityqueueattr","title":"AffinityQueueAttr","text":"specifies a set of allowed queues for an operation
WIP; see #10765. This may change in the future to either be a nested attribute on a larger affinity struct or be defined by an implementation of the affinity attr interface. For now this allows higher levels of the stack to specify queues such that the stream dialect can understand them and they can be lowered into the HAL dialect.
Specifies that an annotated operation or scope is only allowed to execute on the set of queues (0-64) provided. Operations will not run on other queues.
Example:
// any queue\n#hal.affinity.queue<*>\n// queues 4 and 5\n#hal.affinity.queue<[4, 5]>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters","title":"Parameters:","text":"Parameter C++ type Description mask int64_t
"},{"location":"reference/mlir-dialects/HAL/#collectiveattr","title":"CollectiveAttr","text":"collective operation and specification
Syntax:
#hal.collective<\n CollectiveKind, # kind\n std::optional<CollectiveReductionOp>, # reduction\n CollectiveElementType # element_type\n>\n
Specifies the collective operation to perform and any mode bits required.
"},{"location":"reference/mlir-dialects/HAL/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description kind CollectiveKind
reduction std::optional<CollectiveReductionOp>
element_type CollectiveElementType
"},{"location":"reference/mlir-dialects/HAL/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"descriptor set binding specification
Syntax:
#hal.descriptor_set.binding<\n int64_t, # ordinal\n DescriptorType, # type\n std::optional<DescriptorFlags> # flags\n>\n
Specifies a single binding within a descriptor set layout.
"},{"location":"reference/mlir-dialects/HAL/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
type DescriptorType
flags std::optional<DescriptorFlags>
"},{"location":"reference/mlir-dialects/HAL/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"descriptor set layout specification
Syntax:
#hal.descriptor_set.layout<\n int64_t, # ordinal\n ::llvm::ArrayRef<DescriptorSetBindingAttr>, # bindings\n std::optional<DescriptorSetLayoutFlags> # flags\n>\n
Specifies the layout information of a single set of descriptors used within an pipeline layout. Multiple of these sets may be used by a single entry point to allow for bindings with similar update frequencies to be grouped.
"},{"location":"reference/mlir-dialects/HAL/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
bindings ::llvm::ArrayRef<DescriptorSetBindingAttr>
flags std::optional<DescriptorSetLayoutFlags>
"},{"location":"reference/mlir-dialects/HAL/#descriptortypeattr","title":"DescriptorTypeAttr","text":"valid DescriptorType
Syntax:
#hal.descriptor_type<\n ::mlir::iree_compiler::IREE::HAL::DescriptorType # value\n>\n
Enum cases: * uniform_buffer (UniformBuffer
) * storage_buffer (StorageBuffer
)
"},{"location":"reference/mlir-dialects/HAL/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::HAL::DescriptorType
an enum of type DescriptorType"},{"location":"reference/mlir-dialects/HAL/#devicematcharchitectureattr","title":"DeviceMatchArchitectureAttr","text":"matches against a device architecture pattern
Matches a device by its runtime architecture. The format of the architecture pattern is device-dependent.
"},{"location":"reference/mlir-dialects/HAL/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchexecutableformatattr","title":"DeviceMatchExecutableFormatAttr","text":"matches when a device supports the given executable format
Matches a device only if it claims to support the given executable format pattern. It's still possible that the executable cannot be loaded such as if it uses unavailable device features. This is used for queries such as \"can you load ELF libraries?\" to quickly get to a set of executables to attempt without needing to try dozens that definitely cannot be loaded.
Note that different devices may share the same executable formats: for example a local synchronous CPU executor and a remote asynchronous CPU executor can both load ELF libraries. It's also possible for the same device to support multiple formats such as being able to load both platform-agnostic ELF libraries and platform-specific DLL/MachO/etc libraries.
"},{"location":"reference/mlir-dialects/HAL/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchfeatureattr","title":"DeviceMatchFeatureAttr","text":"matches against a supported device feature pattern
Matches a device that supports the given feature. The format of the feature pattern is device-dependent.
"},{"location":"reference/mlir-dialects/HAL/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchidattr","title":"DeviceMatchIDAttr","text":"matches against a device ID pattern
Matches a device by its canonical compiler/runtime ID.
"},{"location":"reference/mlir-dialects/HAL/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicetargetattr","title":"DeviceTargetAttr","text":"generic device target specification
Specifies the properties of a target runtime device. Target devices are specified with a canonical identifier matching those used by the runtime (such as cpu
, vulkan
, etc). Target devices may support several target executable formats specified with #hal.executable.target
. An optional configuration dictionary allows for overriding backend defaults.
Example:
#hal.device.target<\"llvm-cpu\", {\n executable_targets = [\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_32\">,\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_64\">,\n ]\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description deviceID StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/HAL/#executableobjectattr","title":"ExecutableObjectAttr","text":"object file reference
Defines an object file that can be linked into executables. Today this is only supported for external file references with paths the compiler can successfully resolve from its current working directory. Inlined data can optionally be provided to avoid the need for file system access and ensure the data source is attached to the IR as it makes its way through multiple compiler stages or reproducers.
Future revisions may change this to an interface that allows both internal and external resources to define the object contents. Linking needs to be updated to support various object compositions and certain backends may require additional infrastructure support.
In the long term the goal is to allow combinations of declared objects and generated code in order to give control of linking behavior to frontends. Instead of needing global command line flags to link in additional blobs the frontend can emit executables with the dependencies already defined per variant without needing to reach into the IREE compiler code.
Example:
#hal.executable.object<{path = \"some/file.obj\"}>\n#hal.executable.object<{\n path = \"some/embedded/file.obj\",\n data = dense<[...]> : vector<2048xi8>\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_10","title":"Parameters:","text":"Parameter C++ type Description path StringAttr
data DenseIntElementsAttr
"},{"location":"reference/mlir-dialects/HAL/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"target-specific object file references
A dictionary mapping executable target specifications to a list of objects. This is used to allow layers of the stack that support multi-targeting to specify information used during lowering into each particular target.
The key attributes are matched against each target variant based on the backend and format as well as any configuration data provided. When comparing the configuration only fields present in both the key and target variant will be checked and must match. This allows specification of generic sets (\"all x86_64 targets get these objects\") as well as specific ones (\"only x86_64 targets with vector_size = 64 get these objects\").
Example:
#hal.executable.objects<{\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_64\"> = [\n #hal.executable.object<{path = \"some/file_arm_64.obj\"}>\n ],\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-x86_64\"> = [\n #hal.executable.object<{path = \"some/file_x86_64.obj\"}>\n ]\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_11","title":"Parameters:","text":"Parameter C++ type Description targets ArrayAttr
targetObjects ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#executabletargetattr","title":"ExecutableTargetAttr","text":"generic executable target specification
Specifies how to compile an executable for a specific target backend. A backend is used to translate and serialize the executable into the final form passed to the runtime. The format of the executable is a target-specific value indicating the required runtime support to load the deployed artifact. An optionally provided configuration dictionary overrides backend-specific defaults.
Example:
// Produce a system-native ELF for x86-64 systems using the LLVM backend:\n #hal.executable.target<\"llvm-cpu\", \"system-elf-x86_64\", {\n triple = \"x86_64-unknown-linux-elf\",\n cpu = \"host\",\n cpu_features = \"host\",\n abi = \"lp32\",\n ...\n }>\n
The same compilation backend may be used to translate executables for several different runtime devices. Likewise the same runtime device may use one of many different executable targets. Assume an N:M mapping between the two in all cases.
"},{"location":"reference/mlir-dialects/HAL/#parameters_12","title":"Parameters:","text":"Parameter C++ type Description backend StringAttr
format StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/HAL/#interfacebindingattr","title":"InterfaceBindingAttr","text":"interface binding specification
Syntax:
#hal.interface.binding<\n int64_t, # set\n int64_t # binding\n>\n
Specifies the descriptor set and binding ordinal of a particular layout binding.
Example:
#hal.interface.binding<0, 1>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_13","title":"Parameters:","text":"Parameter C++ type Description set int64_t
binding int64_t
"},{"location":"reference/mlir-dialects/HAL/#matchallattr","title":"MatchAllAttr","text":"matches if all subexpressions match
Returns true only if all subexpressions return true (logical AND) or empty.
"},{"location":"reference/mlir-dialects/HAL/#parameters_14","title":"Parameters:","text":"Parameter C++ type Description conditions ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#matchalwaysattr","title":"MatchAlwaysAttr","text":"always matches
Syntax: #hal.match.always
Returns true (constant true).
"},{"location":"reference/mlir-dialects/HAL/#matchanyattr","title":"MatchAnyAttr","text":"matches if any subexpression matches
Returns true if any subexpression matches (logical OR) and not empty.
"},{"location":"reference/mlir-dialects/HAL/#parameters_15","title":"Parameters:","text":"Parameter C++ type Description conditions ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"executable entry point layout specification
Syntax:
#hal.pipeline.layout<\n int64_t, # pushConstants\n ::llvm::ArrayRef<DescriptorSetLayoutAttr> # setLayouts\n>\n
Specifies the layout information used for interacting with executable functions. This allows host code to correctly map parameters to the lower-level target-specific argument passing behavior.
"},{"location":"reference/mlir-dialects/HAL/#parameters_16","title":"Parameters:","text":"Parameter C++ type Description pushConstants int64_t
setLayouts ::llvm::ArrayRef<DescriptorSetLayoutAttr>
"},{"location":"reference/mlir-dialects/HAL/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/HAL/#allocator","title":"allocator","text":"Allocates buffers for a particular device memory space.
"},{"location":"reference/mlir-dialects/HAL/#buffer","title":"buffer","text":"A memory buffer with a specific memory_type that is used to describe the capabilities and behavior of the backing memory of the buffer. Buffers may be any mix of host-accessible, host-coherent, or device-accessible for various usages. Depending on these memory types the buffers may be mapped for access on the host as memory though certain restrictions may be imposed.
"},{"location":"reference/mlir-dialects/HAL/#buffer_view","title":"buffer_view","text":"A shaped and typed buffer reference. This just wraps an existing hal.buffer with its associated metadata to make it easier to pass across ABI boundaries. In most cases buffer views can be elided entirely by the compiler and they'll only be seen when calling external functions.
"},{"location":"reference/mlir-dialects/HAL/#collectivechannel","title":"collective.channel","text":"Channel identifier used to allow for participation in multiple collective groups.
"},{"location":"reference/mlir-dialects/HAL/#command_buffer","title":"command_buffer","text":"Asynchronous command buffer recording interface. Commands are recorded by the implementation for later submission to command queues.
"},{"location":"reference/mlir-dialects/HAL/#descriptor_set_layout","title":"descriptor_set_layout","text":"Descriptor set layout.
"},{"location":"reference/mlir-dialects/HAL/#device","title":"device","text":"Logical device instance.
"},{"location":"reference/mlir-dialects/HAL/#event","title":"event","text":"Events are used for defining synchronization scopes within CommandBuffers. An event only exists within a single CommandBuffer and must not be used across CommandBuffers from the same device or others.
"},{"location":"reference/mlir-dialects/HAL/#executable","title":"executable","text":"A prepared and ready-to-dispatch executable.
"},{"location":"reference/mlir-dialects/HAL/#fence","title":"fence","text":"A set of semaphore timepoints defining a common point in time across multiple timelines.
"},{"location":"reference/mlir-dialects/HAL/#buffer_1","title":"buffer","text":"A stateless file handle that can be read/written using queue-ordered transfer operations.
"},{"location":"reference/mlir-dialects/HAL/#pipeline_layout","title":"pipeline_layout","text":"A pipeline layout describing the descriptor sets and push constants used.
"},{"location":"reference/mlir-dialects/HALInline/","title":"HAL/Inline","text":""},{"location":"reference/mlir-dialects/HALInline/#hal_inline-dialect","title":"'hal_inline' Dialect","text":"IREE inline HAL interop runtime module dialect.
Low-level dialect for limited in-process ABI interop with the full HAL. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.
This dialect can be used alongside the full HAL but is intended for use in standalone configurations or paired with the hal_loader
dialect which also carries the same usage restrictions.
See hal_inline.imports.mlir
for the full list of exported functions.
- 'hal_inline' Dialect
- Operation definition
- Buffer ops
- hal_inline.buffer.allocate.initialized (HAL::Inline::BufferAllocateInitializedOp)
- hal_inline.buffer.allocate (HAL::Inline::BufferAllocateOp)
- hal_inline.buffer.length (HAL::Inline::BufferLengthOp)
- hal_inline.buffer.storage (HAL::Inline::BufferStorageOp)
- hal_inline.buffer.subspan (HAL::Inline::BufferSubspanOp)
- hal_inline.buffer.wrap (HAL::Inline::BufferWrapOp)
- Buffer view ops
- hal_inline.buffer_view.assert (HAL::Inline::BufferViewAssertOp)
- hal_inline.buffer_view.buffer (HAL::Inline::BufferViewBufferOp)
- hal_inline.buffer_view.create (HAL::Inline::BufferViewCreateOp)
- hal_inline.buffer_view.dim (HAL::Inline::BufferViewDimOp)
- hal_inline.buffer_view.element_type (HAL::Inline::BufferViewElementTypeOp)
- hal_inline.buffer_view.encoding_type (HAL::Inline::BufferViewEncodingTypeOp)
- hal_inline.buffer_view.rank (HAL::Inline::BufferViewRankOp)
- hal_inline.buffer_view.trace (HAL::Inline::BufferViewTraceOp)
- Device ops
- hal_inline.device.query (HAL::Inline::DeviceQueryOp)
"},{"location":"reference/mlir-dialects/HALInline/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HALInline/#buffer-ops","title":"Buffer ops","text":"Ops for !hal.buffer
/ iree_hal_buffer_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocateinitialized-halinlinebufferallocateinitializedop","title":"hal_inline.buffer.allocate.initialized
(HAL::Inline::BufferAllocateInitializedOp)","text":"Buffer allocation with cloning
Syntax:
operation ::= `hal_inline.buffer.allocate.initialized` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `alignment` `(` $minimum_alignment `)`\n `:` custom<SizeAwareType>(type($result), ref($length)) `in` type($storage)\n attr-dict-with-keyword\n
Allocates a buffer with a copy of the provided contents.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands","title":"Operands:","text":"Operand Description minimum_alignment
index source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results","title":"Results:","text":"Result Description result
buffer storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocate-halinlinebufferallocateop","title":"hal_inline.buffer.allocate
(HAL::Inline::BufferAllocateOp)","text":"Empty buffer allocation operation
Syntax:
operation ::= `hal_inline.buffer.allocate` `alignment` `(` $minimum_alignment `)`\n `:` custom<SizeAwareType>(type($result), $allocation_size) `in` type($storage)\n attr-dict-with-keyword\n
Allocates a buffer of the given size. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands_1","title":"Operands:","text":"Operand Description minimum_alignment
index allocation_size
index"},{"location":"reference/mlir-dialects/HALInline/#results_1","title":"Results:","text":"Result Description result
buffer storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferlength-halinlinebufferlengthop","title":"hal_inline.buffer.length
(HAL::Inline::BufferLengthOp)","text":"Buffer byte length accessor
Syntax:
operation ::= `hal_inline.buffer.length` `<` $buffer `:` type($buffer) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_2","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HALInline/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferstorage-halinlinebufferstorageop","title":"hal_inline.buffer.storage
(HAL::Inline::BufferStorageOp)","text":"Buffer backing storage accessor
Syntax:
operation ::= `hal_inline.buffer.storage` `<` $buffer `:` type($buffer) `>`\n `:` type($storage)\n attr-dict-with-keyword\n
Returns the host backing storage of the HAL buffer as a subspan limited to to the buffer's logical range (meaning that byte 0 of the returned buffer is byte 0 of the HAL buffer).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_3","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HALInline/#results_3","title":"Results:","text":"Result Description storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffersubspan-halinlinebuffersubspanop","title":"hal_inline.buffer.subspan
(HAL::Inline::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `hal_inline.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SizeAwareOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_4","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results_4","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferwrap-halinlinebufferwrapop","title":"hal_inline.buffer.wrap
(HAL::Inline::BufferWrapOp)","text":"Host buffer wrapping operation
Syntax:
operation ::= `hal_inline.buffer.wrap` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Tries wrapping a !hal.buffer around host memory backed by the given byte buffer.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands_5","title":"Operands:","text":"Operand Description source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results_5","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#buffer-view-ops","title":"Buffer view ops","text":"Ops for !hal.buffer_view
/ iree_hal_buffer_view_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewassert-halinlinebufferviewassertop","title":"hal_inline.buffer_view.assert
(HAL::Inline::BufferViewAssertOp)","text":"Buffer view contents assertion
Syntax:
operation ::= `hal_inline.buffer_view.assert` `<` $buffer_view `:` type($buffer_view) `>`\n `message` `(` $message `)`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n attr-dict-with-keyword\n
Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if std.assert
had been used.
"},{"location":"reference/mlir-dialects/HALInline/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_6","title":"Operands:","text":"Operand Description buffer_view
buffer_view element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewbuffer-halinlinebufferviewbufferop","title":"hal_inline.buffer_view.buffer
(HAL::Inline::BufferViewBufferOp)","text":"Buffer view buffer accessor
Syntax:
operation ::= `hal_inline.buffer_view.buffer` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the buffer backing this view's contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_7","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_6","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewcreate-halinlinebufferviewcreateop","title":"hal_inline.buffer_view.create
(HAL::Inline::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `hal_inline.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_8","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HALInline/#results_7","title":"Results:","text":"Result Description result
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewdim-halinlinebufferviewdimop","title":"hal_inline.buffer_view.dim
(HAL::Inline::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `hal_inline.buffer_view.dim` `<` $buffer_view `:` type($buffer_view) `>`\n `` `[` $index `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_9","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_8","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewelement_type-halinlinebufferviewelementtypeop","title":"hal_inline.buffer_view.element_type
(HAL::Inline::BufferViewElementTypeOp)","text":"Buffer view element type query
Syntax:
operation ::= `hal_inline.buffer_view.element_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the element type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_10","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_9","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewencoding_type-halinlinebufferviewencodingtypeop","title":"hal_inline.buffer_view.encoding_type
(HAL::Inline::BufferViewEncodingTypeOp)","text":"Buffer view encoding type query
Syntax:
operation ::= `hal_inline.buffer_view.encoding_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the encoding type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_11","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_10","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewrank-halinlinebufferviewrankop","title":"hal_inline.buffer_view.rank
(HAL::Inline::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `hal_inline.buffer_view.rank` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_12","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_11","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewtrace-halinlinebufferviewtraceop","title":"hal_inline.buffer_view.trace
(HAL::Inline::BufferViewTraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `hal_inline.buffer_view.trace` $key `=`\n $operands `:` type($operands)\n attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.
"},{"location":"reference/mlir-dialects/HALInline/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_13","title":"Operands:","text":"Operand Description operands
variadic of buffer_view"},{"location":"reference/mlir-dialects/HALInline/#device-ops","title":"Device ops","text":"Ops for !hal.device
/ iree_hal_device_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinedevicequery-halinlinedevicequeryop","title":"hal_inline.device.query
(HAL::Inline::DeviceQueryOp)","text":"Returns a runtime configuration parameter from the device
Syntax:
operation ::= `hal_inline.device.query` `key` `(` $category `:` `` `:` $key `)`\n `:` type($ok) `,` type($value)\n (`=` $default_value^)?\n attr-dict-with-keyword\n
Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.
This is roughly equivalent to the sysconf
linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent. If there is a HAL match attribute (#hal.device.match.*
) or op (hal.device.match.*
) prefer to use that in order to get compile-time propagation when the target is specified and elide the runtime query and get compile-time verification when a runtime query is required.
Users of the op must check the ok
result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with ex.
when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.
Well-known keys: (none yet)
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription category
::mlir::StringAttrstring attribute key
::mlir::StringAttrstring attribute default_value
::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HALInline/#results_12","title":"Results:","text":"Result Description ok
1-bit signless integer value
any type"},{"location":"reference/mlir-dialects/HALLoader/","title":"HAL/Loader","text":""},{"location":"reference/mlir-dialects/HALLoader/#hal_loader-dialect","title":"'hal_loader' Dialect","text":"IREE HAL inline executable loader runtime module dialect.
Low-level dialect for dynamically loading executables and dispatching work. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.
This dialect can be used alongside the full HAL but is intended for use in conjunction with the hal_inline
dialect which also carries the same usage restrictions.
See hal_loader.imports.mlir
for the full list of exported functions.
- 'hal_loader' Dialect
- Operation definition
- Executable ops
- hal_loader.executable.dispatch (HAL::Loader::ExecutableDispatchOp)
- hal_loader.executable.dispatch.symbol (HAL::Loader::ExecutableDispatchSymbolOp)
- hal_loader.executable.load (HAL::Loader::ExecutableLoadOp)
- hal_loader.executable.lookup (HAL::Loader::ExecutableLookupOp)
- hal_loader.executable.query_support (HAL::Loader::ExecutableQuerySupportOp)
"},{"location":"reference/mlir-dialects/HALLoader/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HALLoader/#executable-ops","title":"Executable ops","text":"Ops for !hal.executable
/ iree_hal_executable_t
.
"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatch-halloaderexecutabledispatchop","title":"hal_loader.executable.dispatch
(HAL::Loader::ExecutableDispatchOp)","text":"Inline executable dispatch operation
Syntax:
operation ::= `hal_loader.executable.dispatch` `executable` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n (`constants` `(` `[` $push_constants^ `]` `)`)?\n `bindings` `(` `[`\n custom<DispatchBindings>($binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Dispatches execution to an executable entry point with the given parameters.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/HALLoader/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HALLoader/#operands","title":"Operands:","text":"Operand Description executable
executable workgroup_x
index workgroup_y
index workgroup_z
index push_constants
variadic of 32-bit signless integer binding_buffers
variadic of a reference counted byte buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatchsymbol-halloaderexecutabledispatchsymbolop","title":"hal_loader.executable.dispatch.symbol
(HAL::Loader::ExecutableDispatchSymbolOp)","text":"Inline executable dispatch operation
Syntax:
operation ::= `hal_loader.executable.dispatch.symbol` `executable` `(` $executable `:` type($executable) `)`\n `target` `(` $entry_point `)`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n (`constants` `(` `[` $push_constants^ `]` `)`)?\n `bindings` `(` `[`\n custom<DispatchBindings>($binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Dispatches execution to an executable entry point with the given parameters. The entry point is a symbolic reference to an exported entry point.
Traits: AttrSizedOperandSegments
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_1","title":"Operands:","text":"Operand Description executable
executable workgroup_x
index workgroup_y
index workgroup_z
index push_constants
variadic of 32-bit signless integer binding_buffers
variadic of a reference counted byte buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutableload-halloaderexecutableloadop","title":"hal_loader.executable.load
(HAL::Loader::ExecutableLoadOp)","text":"Dynamically loads an executable
Syntax:
operation ::= `hal_loader.executable.load` `format` `(` $format `)`\n `data` `(` $data `)`\n (`constants` `(` `[` $constants^ `]` `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Creates, loads, and dynamically links an executable.
Optional constants provide for specialization of the executable based on runtime-derived parameters.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_2","title":"Operands:","text":"Operand Description data
a reference counted byte buffer constants
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HALLoader/#results","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablelookup-halloaderexecutablelookupop","title":"hal_loader.executable.lookup
(HAL::Loader::ExecutableLookupOp)","text":"Executable cache lookup pseudo-op
Syntax:
operation ::= `hal_loader.executable.lookup` `executable` `(` $executable `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SymbolUserOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription executable
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_1","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablequery_support-halloaderexecutablequerysupportop","title":"hal_loader.executable.query_support
(HAL::Loader::ExecutableQuerySupportOp)","text":"Queries whether an executable format is supported
Syntax:
operation ::= `hal_loader.executable.query_support` `format` `(` $executable_format `)`\n `:` type($supported)\n attr-dict-with-keyword\n
Returns true if the given format is supported by the device loader. This does not guarantee that loading will succeed as the executable may require functionality that cannot be met my the hosting runtime environment.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription executable_format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_2","title":"Results:","text":"Result Description supported
1-bit signless integer"},{"location":"reference/mlir-dialects/IOParameters/","title":"IO/Parameters","text":""},{"location":"reference/mlir-dialects/IOParameters/#io_parameters-dialect","title":"'io_parameters' Dialect","text":"External parameter resource management APIs.
Parameters are externalized storage for resources that are asynchronously accessible and device-aware. Parameters can be read or written on the same device timelines as the operations that consume or produce them and with locality pinning to ensure memory doesn't need to move. Parameters are referenced by a scope and a key, with the scope being optional but strongly recommended as a way to distinguish sets of parameters that may exist when multiple model parts are compiled together and would otherwise collide.
Parameters are provided by a few operations implementing a virtual interface and can support shared parameters (same storage used in multiple contexts, or outliving a single instantiation in a context), in-memory caches, memory-mapped files (including directly using the mapped memory for execution when devices support it), iree_hal_file_t
usage for device-supported I/O, and parameter subsetting for things like runtime sharding.
Alongside read(+load) and write operations gather and scatter allow for batching of large numbers of reads and writes into/from single buffers. For parameter providers that can batch operations this allows for a handful (~1-4) of calls out to perform many more operations (~thousands). Modeling the gather/scatter also gives us a point where we could extract the mapping and use it to repack files/defrag memory in the future.
See io_parameters.imports.mlir
for the full list of exported functions.
- 'io_parameters' Dialect
- Operation definition
- Parameter I/O ops
- io_parameters.gather (IO::Parameters::GatherOp)
- io_parameters.load (IO::Parameters::LoadOp)
- io_parameters.scatter (IO::Parameters::ScatterOp)
"},{"location":"reference/mlir-dialects/IOParameters/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IOParameters/#parameter-io-ops","title":"Parameter I/O ops","text":"Ops parameter I/O.
"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersgather-ioparametersgatherop","title":"io_parameters.gather
(IO::Parameters::GatherOp)","text":"Gathers multiple parameters from a parameter scope
Syntax:
operation ::= `io_parameters.gather` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `{`\n custom<ParameterGatherOperations>(\n $source_scope, $source_keys, $source_offsets,\n $target_buffer, type($target_buffer), $target_offsets, $target_lengths)\n `}`\n attr-dict-with-keyword\n
Asynchronously gathers one or more parameters into a single target buffer. This is equivalent to one read per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IOParameters/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_keys
::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_offsets
variadic of 64-bit signless integer target_buffer
buffer target_offsets
variadic of index target_lengths
variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersload-ioparametersloadop","title":"io_parameters.load
(IO::Parameters::LoadOp)","text":"Reads a parameter from a parameter scope
Syntax:
operation ::= `io_parameters.load` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` custom<ParameterReference>($source_scope, $source_key) `)`\n `` `[` $source_offset `]`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $length)\n attr-dict-with-keyword\n
Asynchronously reads a parameter from an external parameter provider and returns the resulting buffer. Depending on the parameter and buffer types this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if an allocate + read had been used.
Interfaces: Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/IOParameters/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/IOParameters/#operands_1","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_offset
64-bit signless integer length
index"},{"location":"reference/mlir-dialects/IOParameters/#results","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersscatter-ioparametersscatterop","title":"io_parameters.scatter
(IO::Parameters::ScatterOp)","text":"Scatters multiple parameters to a parameter scope
Syntax:
operation ::= `io_parameters.scatter` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `{`\n custom<ParameterScatterOperations>(\n $source_buffer, type($source_buffer), $source_offsets, $source_lengths,\n $target_scope, $target_keys, $target_offsets)\n `}`\n attr-dict-with-keyword\n
Asynchronously scatters one or more parameters from a single source buffer into one or more parameters. This is equivalent to one write per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IOParameters/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_keys
::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands_2","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_buffer
buffer source_offsets
variadic of index source_lengths
variadic of index target_offsets
variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/IREEInput/","title":"IREEInput","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_input-dialect","title":"'iree_input' Dialect","text":"Public ops/type/attributes legal for input to IREE's compiler.
IREE's compiler allows as input a number of common dialects. This dialect contains structural and unique ops that do not exist elsewhere or that IREE has an interest in maintaining as a stable set.
The contents of this dialect often mirror various constructs in IREE's internal implementation. The focus here is on simplicity and stability over time. Generally, this dialect does not use \"advanced\" features and should be broadly source compatible over a range of LLVM versions. There are of course, limits, and source-compatibility is not guaranteed, since LLVM/MLIR's API surface is itself unstable.
- 'iree_input' Dialect
- Operation definition
- Buffer and buffer view ops
- iree_input.buffer.subspan (Input::BufferSubspanOp)
- iree_input.buffer_view.create (Input::BufferViewCreateOp)
- iree_input.buffer_view.dim (Input::BufferViewDimOp)
- iree_input.buffer_view.rank (Input::BufferViewRankOp)
- Byte buffer ops
- iree_input.byte_buffer.constant (Input::ByteBufferConstantOp)
- Compiler hint ops
- iree_input.optimization_barrier (Input::OptimizationBarrierOp)
- Dispatch ops
- iree_input.dispatch (Input::DispatchOp)
- Executable source ops
- iree_input.executable.export (Input::ExecutableExportOp)
- iree_input.executable.source_end (Input::ExecutableSourceEndOp)
- iree_input.executable.source (Input::ExecutableSourceOp)
- Global variable ops
- iree_input.global.address (Input::GlobalAddressOp)
- iree_input.global.load.indirect (Input::GlobalLoadIndirectOp)
- iree_input.global.load (Input::GlobalLoadOp)
- iree_input.global (Input::GlobalOp)
- iree_input.global.store.indirect (Input::GlobalStoreIndirectOp)
- iree_input.global.store (Input::GlobalStoreOp)
- Mutable list ops
- iree_input.list.create (Input::ListCreateOp)
- iree_input.list.get (Input::ListGetOp)
- iree_input.list.resize (Input::ListResizeOp)
- iree_input.list.set (Input::ListSetOp)
- iree_input.list.size (Input::ListSizeOp)
- Pseudo ops for conversion support
- iree_input.tensor.export (Input::TensorExportOp)
- iree_input.tensor.import (Input::TensorImportOp)
- Tensor ops
- iree_input.tensor.bitcast (Input::TensorBitCastOp)
- iree_input.tensor.clone (Input::TensorCloneOp)
- iree_input.tensor.load (Input::TensorLoadOp)
- iree_input.tensor.reshape (Input::TensorReshapeOp)
- iree_input.tensor.slice (Input::TensorSliceOp)
- iree_input.tensor.splat (Input::TensorSplatOp)
- iree_input.tensor.store (Input::TensorStoreOp)
- iree_input.tensor.trace (Input::TensorTraceOp)
- iree_input.tensor.update (Input::TensorUpdateOp)
- Utility ops
- iree_input.align (Input::AlignOp)
- iree_input.null (Input::NullOp)
- Workgroup dispatch ops
- iree_input.dispatch.workgroup.count (Input::DispatchWorkgroupCountOp)
- iree_input.dispatch.workgroup.id (Input::DispatchWorkgroupIDOp)
- iree_input.dispatch.workgroup.size (Input::DispatchWorkgroupSizeOp)
- Attribute definition
- DescriptorSetBindingAttr
- DescriptorSetLayoutAttr
- DescriptorTypeAttr
- DeviceTargetAttr
- ExecutableObjectAttr
- ExecutableObjectsAttr
- ExecutableTargetAttr
- PipelineLayoutAttr
- Type constraint definition
- list
- Type definition
- BufferType
- BufferViewType
- ByteBufferType
- ListType
- PtrType
- VariantType
"},{"location":"reference/mlir-dialects/IREEInput/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffer-and-buffer-view-ops","title":"Buffer and buffer view ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffersubspan-inputbuffersubspanop","title":"iree_input.buffer.subspan
(Input::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `iree_input.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands","title":"Operands:","text":"Operand Description source_buffer
Buffer is an untyped bag of bits with no shape or dtype source_offset
index length
index"},{"location":"reference/mlir-dialects/IREEInput/#results","title":"Results:","text":"Result Description result
Buffer is an untyped bag of bits with no shape or dtype"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewcreate-inputbufferviewcreateop","title":"iree_input.buffer_view.create
(Input::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `iree_input.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_1","title":"Operands:","text":"Operand Description source_buffer
Buffer is an untyped bag of bits with no shape or dtype source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_1","title":"Results:","text":"Result Description result
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewdim-inputbufferviewdimop","title":"iree_input.buffer_view.dim
(Input::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `iree_input.buffer_view.dim` $buffer_view `,` $index attr-dict `:` type($result)\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_2","title":"Operands:","text":"Operand Description buffer_view
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewrank-inputbufferviewrankop","title":"iree_input.buffer_view.rank
(Input::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `iree_input.buffer_view.rank` $buffer_view attr-dict `:` type($result)\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_3","title":"Operands:","text":"Operand Description buffer_view
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_3","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#byte-buffer-ops","title":"Byte buffer ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbyte_bufferconstant-inputbytebufferconstantop","title":"iree_input.byte_buffer.constant
(Input::ByteBufferConstantOp)","text":"Constant host-side byte buffer
Syntax:
operation ::= `iree_input.byte_buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n
Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::StringAttrstring attribute alignment
::mlir::IntegerAttrindex attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_4","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/IREEInput/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputoptimization_barrier-inputoptimizationbarrierop","title":"iree_input.optimization_barrier
(Input::OptimizationBarrierOp)","text":"Prevents compiler optimizations across a value.
Syntax:
operation ::= `iree_input.optimization_barrier` attr-dict\n ($operands^ `:` type($operands))?\n
Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.
Traits: SameOperandsAndResultType
"},{"location":"reference/mlir-dialects/IREEInput/#operands_4","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#results_5","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatch-inputdispatchop","title":"iree_input.dispatch
(Input::DispatchOp)","text":"A dispatch of an executable across a grid
Syntax:
operation ::= `iree_input.dispatch` $entry_point\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_5","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_6","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#executable-source-ops","title":"Executable source ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutableexport-inputexecutableexportop","title":"iree_input.executable.export
(Input::ExecutableExportOp)","text":"Executable entry point declaration
Syntax:
operation ::= `iree_input.executable.export` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n `ordinal` `(` $ordinal `)`\n `layout` `(` $layout `)`\n attr-dict-with-keyword\n
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrsize_t layout
::mlir::iree_compiler::IREE::Input::PipelineLayoutAttrexecutable entry point layout specification workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource_end-inputexecutablesourceendop","title":"iree_input.executable.source_end
(Input::ExecutableSourceEndOp)","text":"Terminator pseudo-op for the executable source op
Syntax:
operation ::= `iree_input.executable.source_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource-inputexecutablesourceop","title":"iree_input.executable.source
(Input::ExecutableSourceOp)","text":"
Generic source contents of an executable op
Syntax:
operation ::= `iree_input.executable.source` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n ``\n regions\n
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute objects
::mlir::iree_compiler::IREE::Input::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/IREEInput/#global-variable-ops","title":"Global variable ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobaladdress-inputglobaladdressop","title":"iree_input.global.address
(Input::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `iree_input.global.address` $global attr-dict `:` type($result)\n
Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_7","title":"Results:","text":"Result Description result
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputgloballoadindirect-inputgloballoadindirectop","title":"iree_input.global.load.indirect
(Input::GlobalLoadIndirectOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `iree_input.global.load.indirect` $global attr-dict `:` type($global) `->` type($result)\n
Returns a copy of the global value.
"},{"location":"reference/mlir-dialects/IREEInput/#operands_6","title":"Operands:","text":"Operand Description global
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#results_8","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalload-inputgloballoadop","title":"iree_input.global.load
(Input::GlobalLoadOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `iree_input.global.load` $global attr-dict `:` type($result)\n
Returns a copy of the global value.
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_9","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobal-inputglobalop","title":"iree_input.global
(Input::GlobalOp)","text":"Stateful global variable declaration
Syntax:
operation ::= `iree_input.global` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n (`initializer` `(` $initializer^ `)`)?\n custom<TypeOrAttr>($type, $initial_value)\n
Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different global storage.
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initializer
::mlir::FlatSymbolRefAttrflat symbol reference attribute initial_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstoreindirect-inputglobalstoreindirectop","title":"iree_input.global.store.indirect
(Input::GlobalStoreIndirectOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `iree_input.global.store.indirect` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a copy of the value into a global.
"},{"location":"reference/mlir-dialects/IREEInput/#operands_7","title":"Operands:","text":"Operand Description value
any type global
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstore-inputglobalstoreop","title":"iree_input.global.store
(Input::GlobalStoreOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `iree_input.global.store` $value `,` $global attr-dict `:` type($value)\n
Stores a copy of the value into a global.
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_8","title":"Operands:","text":"Operand Description value
any type"},{"location":"reference/mlir-dialects/IREEInput/#mutable-list-ops","title":"Mutable list ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistcreate-inputlistcreateop","title":"iree_input.list.create
(Input::ListCreateOp)","text":"Creates a new empty list
Syntax:
operation ::= `iree_input.list.create` ($initial_capacity^)? attr-dict `:` type($result)\n
Creates a new empty list with an optional initial capacity.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_9","title":"Operands:","text":"Operand Description initial_capacity
index"},{"location":"reference/mlir-dialects/IREEInput/#results_10","title":"Results:","text":"Result Description result
list"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistget-inputlistgetop","title":"iree_input.list.get
(Input::ListGetOp)","text":"Element accessor
Syntax:
operation ::= `iree_input.list.get` $list `[` $index `]` attr-dict `:` type($list) `->` type($result)\n
Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_10","title":"Operands:","text":"Operand Description list
list index
index"},{"location":"reference/mlir-dialects/IREEInput/#results_11","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistresize-inputlistresizeop","title":"iree_input.list.resize
(Input::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `iree_input.list.resize` operands attr-dict `:` type($list)\n
Resizes the list to contain new_size
elements. This will either truncate the list if the existing size is greater than new_size
or extend the list with the default list value of the element type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_11","title":"Operands:","text":"Operand Description list
list new_size
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistset-inputlistsetop","title":"iree_input.list.set
(Input::ListSetOp)","text":"Element mutator
Syntax:
operation ::= `iree_input.list.set` $list `[` $index `]` `,` $value attr-dict `:` type($list) `,` type($value)\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_12","title":"Operands:","text":"Operand Description list
list index
index value
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistsize-inputlistsizeop","title":"iree_input.list.size
(Input::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `iree_input.list.size` operands attr-dict `:` type($list)\n
Returns the current size of the list in elements.
Interfaces: InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_13","title":"Operands:","text":"Operand Description list
list"},{"location":"reference/mlir-dialects/IREEInput/#results_12","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#pseudo-ops-for-conversion-support","title":"Pseudo ops for conversion support","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorexport-inputtensorexportop","title":"iree_input.tensor.export
(Input::TensorExportOp)","text":"Exports a tensor to a Buffer(View), capturing dynamic dims
Syntax:
operation ::= `iree_input.tensor.export` $source `:` type($source) (`{` $source_dims^ `}`)? `->` type($target)\n attr-dict-with-keyword\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_14","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_13","title":"Results:","text":"Result Description target
Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorimport-inputtensorimportop","title":"iree_input.tensor.import
(Input::TensorImportOp)","text":"Imports a Buffer(View) to a tensor, providing dynamic dims
Syntax:
operation ::= `iree_input.tensor.import` $source `:` type($source) `->` type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_15","title":"Operands:","text":"Operand Description source
Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type target_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_14","title":"Results:","text":"Result Description target
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorbitcast-inputtensorbitcastop","title":"iree_input.tensor.bitcast
(Input::TensorBitCastOp)","text":"Bitcasts a tensor
Syntax:
operation ::= `iree_input.tensor.bitcast` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Bitcasts a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_16","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_15","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorclone-inputtensorcloneop","title":"iree_input.tensor.clone
(Input::TensorCloneOp)","text":"Performs a full tensor clone operation
Syntax:
operation ::= `iree_input.tensor.clone` $operand `:` type($result) (`{` $operand_dims^ `}`)?\n attr-dict-with-keyword\n
Clones the input tensor into an identical output tensor.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_17","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values operand_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_16","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorload-inputtensorloadop","title":"iree_input.tensor.load
(Input::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `iree_input.tensor.load` $source (`[` $indices^ `]`)? `:`\n type($source) (`{` $source_dims^ `}`)?\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_18","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_17","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorreshape-inputtensorreshapeop","title":"iree_input.tensor.reshape
(Input::TensorReshapeOp)","text":"Reshapes a tensor
Syntax:
operation ::= `iree_input.tensor.reshape` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Reshapes a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_19","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_18","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorslice-inputtensorsliceop","title":"iree_input.tensor.slice
(Input::TensorSliceOp)","text":"Slices out a subregion of a tensor
Syntax:
operation ::= `iree_input.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Clones a subregion of a tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_20","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index start_indices
variadic of index lengths
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_19","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorsplat-inputtensorsplatop","title":"iree_input.tensor.splat
(Input::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `iree_input.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_21","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_20","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorstore-inputtensorstoreop","title":"iree_input.tensor.store
(Input::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `iree_input.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_22","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target
ranked tensor of any type values target_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_21","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensortrace-inputtensortraceop","title":"iree_input.tensor.trace
(Input::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `iree_input.tensor.trace` $key `=` `[`\n custom<ShapedOperandList>($values, type($values), $value_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_23","title":"Operands:","text":"Operand Description values
variadic of ranked tensor of any type values value_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorupdate-inputtensorupdateop","title":"iree_input.tensor.update
(Input::TensorUpdateOp)","text":"Updates a tensor with the contents of another tensor
Syntax:
operation ::= `iree_input.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n type($update) (`{` $update_dims^ `}`)? `->`\n custom<ShapedTiedResult>(type($result), $target_dims)\n attr-dict-with-keyword\n
Updates the target tensor with the contents of the update tensor at the given offset indices.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_24","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index start_indices
variadic of index update
ranked tensor of any type values update_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_22","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputalign-inputalignop","title":"iree_input.align
(Input::AlignOp)","text":"Aligns up to a power-of-two alignment if required
Syntax:
operation ::= `iree_input.align` $value `,` $alignment attr-dict `:` type($result)\n
Aligns |value| up to the given power-of-two |alignment| if required.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_25","title":"Operands:","text":"Operand Description value
signless-integer-like alignment
signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#results_23","title":"Results:","text":"Result Description result
signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputnull-inputnullop","title":"iree_input.null
(Input::NullOp)","text":"A null value
Syntax:
operation ::= `iree_input.null` attr-dict `:` type($result)\n
Initializes reference and variant types with a null value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#results_24","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#workgroup-dispatch-ops","title":"Workgroup dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupcount-inputdispatchworkgroupcountop","title":"iree_input.dispatch.workgroup.count
(Input::DispatchWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `iree_input.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid.
Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.count[0] : index\n%y = iree_input.dispatch.workgroup.count[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_25","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupid-inputdispatchworkgroupidop","title":"iree_input.dispatch.workgroup.id
(Input::DispatchWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `iree_input.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current workgroup in the range of [0, iree_input.dispatch.workgroup.count)
along each dimension.
Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.id[0] : index\n%y = iree_input.dispatch.workgroup.id[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_26","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupsize-inputdispatchworkgroupsizeop","title":"iree_input.dispatch.workgroup.size
(Input::DispatchWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `iree_input.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Workgroup sizes are not determined at the iree dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.
Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.size[0] : index\n%y = iree_input.dispatch.workgroup.size[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_27","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"descriptor set binding specification
Syntax:
#iree_input.descriptor_set.binding<\n int64_t, # ordinal\n DescriptorType, # type\n std::optional<DescriptorFlags> # flags\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
type DescriptorType
flags std::optional<DescriptorFlags>
"},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"descriptor set layout specification
Syntax:
#iree_input.descriptor_set.layout<\n int64_t, # ordinal\n ::llvm::ArrayRef<DescriptorSetBindingAttr>, # bindings\n std::optional<DescriptorSetLayoutFlags> # flags\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
bindings ::llvm::ArrayRef<DescriptorSetBindingAttr>
flags std::optional<DescriptorSetLayoutFlags>
"},{"location":"reference/mlir-dialects/IREEInput/#descriptortypeattr","title":"DescriptorTypeAttr","text":"valid DescriptorType
Syntax:
#iree_input.descriptor_type<\n ::mlir::iree_compiler::IREE::Input::DescriptorType # value\n>\n
Enum cases: * uniform_buffer (UniformBuffer
) * storage_buffer (StorageBuffer
)
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::Input::DescriptorType
an enum of type DescriptorType"},{"location":"reference/mlir-dialects/IREEInput/#devicetargetattr","title":"DeviceTargetAttr","text":"generic device target specification
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description deviceID StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectattr","title":"ExecutableObjectAttr","text":"executable object reference
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description path StringAttr
data DenseIntElementsAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"target-specific object file references
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description targets ArrayAttr
targetObjects ArrayAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executabletargetattr","title":"ExecutableTargetAttr","text":"generic executable target specification
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description backend StringAttr
format StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/IREEInput/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"executable entry point layout specification
Syntax:
#iree_input.pipeline.layout<\n int64_t, # pushConstants\n ::llvm::ArrayRef<DescriptorSetLayoutAttr> # setLayouts\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pushConstants int64_t
setLayouts ::llvm::ArrayRef<DescriptorSetLayoutAttr>
"},{"location":"reference/mlir-dialects/IREEInput/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#list","title":"list","text":"A mutable, resizable list of some type.
"},{"location":"reference/mlir-dialects/IREEInput/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffertype","title":"BufferType","text":"Buffer is an untyped bag of bits with no shape or dtype
Syntax: !iree_input.buffer
Buffers represent an untyped bag of bits that can be reinterpreted depending on a use case using buffer_view
operation. Buffers can be used for packing multiple tensors into the same underlying storage. It is left to higher level code to decide how exactly tensors layed out in the buffer.
"},{"location":"reference/mlir-dialects/IREEInput/#bufferviewtype","title":"BufferViewType","text":"View into a buffer, with runtime shape and element type
Syntax: !iree_input.buffer_view
BufferViews represent views onto backing IREE runtime Buffer objects, adding runtime shape and element type parameters to the backing buffer. BufferViews are typically accepted and returned at boundaries with external code.
In the runtime and lower level compiler, BufferView's are fully modeled; however, as boundary types, not all features are exposed publicly. Since within compiled tensor programs, it is typical to operate in terms of fully typed tensors, the primary mechanism for getting or using a BufferView at the high level is by casting to/from a tensor. It is left to higher level code to ensure that aliasing rules are enforced at such boundaries.
"},{"location":"reference/mlir-dialects/IREEInput/#bytebuffertype","title":"ByteBufferType","text":"a reference counted byte buffer
Syntax: !iree_input.byte_buffer
A reference counted byte buffer that models a pointer, offset, and length.
"},{"location":"reference/mlir-dialects/IREEInput/#listtype","title":"ListType","text":"A one dimensional list of runtime values
Represents a list of arbitrary type. Primitive types can be expected to be efficiently stored in an unboxed form. Reference types and variants are permitted.
Lists can either be homogenous, with a fixed element type, or heterogenous by parameterizing them with a VariantType.
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description elementType ::mlir::Type
A type suitable as an element type of a container"},{"location":"reference/mlir-dialects/IREEInput/#ptrtype","title":"PtrType","text":"Pointer to a concrete type
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description targetType ::mlir::Type
A type suitable as a target type of a pointer"},{"location":"reference/mlir-dialects/IREEInput/#varianttype","title":"VariantType","text":"Represents any legal or reference type in the IREE runtime
Syntax: !iree_input.variant
The variant type is typically used to parameterize container types that can contain any legal primitive, reference or null in the IREE type system.
"},{"location":"reference/mlir-dialects/IREELinalgExt/","title":"IREELinalgExt","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_ext-dialect","title":"'iree_linalg_ext' Dialect","text":"IREE Linalg Extensions.
A dialect designed for experimenting with non-structured operations that cannot be represented efficiently/directly by the Linalg dialect.
- 'iree_linalg_ext' Dialect
- Operation definition
- Data tiling ops
- iree_linalg_ext.pack (LinalgExt::PackOp)
- iree_linalg_ext.set_encoding (LinalgExt::SetEncodingOp)
- iree_linalg_ext.unpack (LinalgExt::UnPackOp)
- iree_linalg_ext.unset_encoding (LinalgExt::UnsetEncodingOp)
- iree_linalg_ext.upper_bound_tile_size (LinalgExt::UpperBoundTileSizeOp)
- Non-structured ops
- iree_linalg_ext.attention (LinalgExt::AttentionOp)
- iree_linalg_ext.fft (LinalgExt::FftOp)
- iree_linalg_ext.reverse (LinalgExt::ReverseOp)
- iree_linalg_ext.scan (LinalgExt::ScanOp)
- iree_linalg_ext.scatter (LinalgExt::ScatterOp)
- iree_linalg_ext.sort (LinalgExt::SortOp)
- iree_linalg_ext.topk (LinalgExt::TopkOp)
- Utility ops
- iree_linalg_ext.transform.do_not_dce_operands (LinalgExt::DoNotDCEOperandsOp)
- iree_linalg_ext.yield (LinalgExt::YieldOp)
- Winograd ops
- iree_linalg_ext.winograd.input_transform (LinalgExt::WinogradInputTransformOp)
- iree_linalg_ext.winograd.output_transform (LinalgExt::WinogradOutputTransformOp)
- Attribute definition
- EncodingAttr
- EncodingRoleAttr
- EncodingUserAttr
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#data-tiling-ops","title":"Data tiling ops","text":"Operations for working with data layouts, padding, encodings, and other properties useful for tiling computations across iteration space dimensions.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extpack-linalgextpackop","title":"iree_linalg_ext.pack
(LinalgExt::PackOp)","text":"Pack operation
Syntax:
operation ::= `iree_linalg_ext.pack` attr-dict\n $inputs\n (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?\n (`outer_dims_perm` `=` $outer_dims_perm^)?\n `inner_dims_pos` `=` $inner_dims_pos\n `inner_tiles` `=`\n custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)\n `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n (`->` type($results)^)?\n
The pack operation converts an input
into a tiled and packed layout. The dimensions to be tiled are obtained from inner_dims_pos
and the size of the tile is obtained from inner_tiles
. The dimensions listed in inner_dims_pos
do not need to be contiguous in which case the tile will get transposed. We handle only full tiles if padding_value
is not set; it is UB if the tile does not perfectly divide the dimension. If padding_value
is set, it will pad along high dimensions, i.e., it pads at the bottom and on the right if the input has rank 2, and the result type shape, will be dynamic in any dimension if and only if the input shape is. As optional input, the operation takes outer_dims_perm
that allows to permute the tiled loops.
Example KC_to_KCck:
iree_linalg_ext.pack %arg0 inner_dims_pos = [1, 0]\n inner_tiles = [32, 8] into %arg1 : (memref<128x256xf32> memref<16x8x32x8xf32>)\n
Example NC_to_NCnc:
iree_linalg_ext.pack %arg0 inner_dims_pos = [0, 1]\n inner_tiles = [8, 32] into %arg1 : (memref<128x256xf32> memref<16x8x8x32xf32>)\n
Example KC_to_CKkc iree_linalg_ext.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n inner_tiles = [32, 8] into %arg1 : (memref<128x256xf32> memref<32x4x32x8xf32>)\n
In all cases, dimension at position 0 in the input memref (128) is tiled with a factor of 8, while dimension at position 1 (256) is tiled with a factor of 32. In the KC_to_KCck example, the point loops are interchanged, while in the KC_to_CKkc example the tiled loops.
Example NC_to_NCnc with padding:
iree_linalg_ext.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]\n inner_tiles = [8, 2] into %arg1 : (memref<13x15xf32> memref<2x8x8x2xf32>)\n
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription outer_dims_perm
::mlir::DenseI64ArrayAttri64 dense array attribute inner_dims_pos
::mlir::DenseI64ArrayAttri64 dense array attribute static_inner_tiles
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values inner_tiles
variadic of index padding_value
any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#results","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extset_encoding-linalgextsetencodingop","title":"iree_linalg_ext.set_encoding
(LinalgExt::SetEncodingOp)","text":"Perform pack and pad operation on source
Syntax:
operation ::= `iree_linalg_ext.set_encoding` attr-dict $source `:` type($source) `->` type($result)\n
Operation to assign an encoding to a tensor. The operation does not change the rank or extent of a tensor. Instead it adds an encoding attribute to the tensor type to represent a change in layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_1","title":"Operands:","text":"Operand Description source
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_1","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extunpack-linalgextunpackop","title":"iree_linalg_ext.unpack
(LinalgExt::UnPackOp)","text":"Unpack operation
Syntax:
operation ::= `iree_linalg_ext.unpack` attr-dict\n $inputs\n (`outer_dims_perm` `=` $outer_dims_perm^)?\n `inner_dims_pos` `=` $inner_dims_pos\n `inner_tiles` `=`\n custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)\n `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n (`->` type($results)^)?\n
The unpack operation converts a tiled and packed input to an unpacked output. See pack
for more details on inner_tiles
and dims_pos
; it is UB if the tile does not perfectly divide the dimension. Optionally, the operation also supports permuting the tiled loops.
Example KCck_to_KC:
iree_linalg_ext.unpack %arg0 dims_pos = [1, 0]\n inner_tiles = [32, 8] into %arg1 : (memref<16x8x32x8xf32> memref<128x256xf32>)\n
Example NCnc_to_NC:
iree_linalg_ext.unpack %arg0 dims_pos = [0, 1]\n inner_tiles = [8, 32] into %arg1 : (memref<16x8x8x32xf32> memref<128x256xf32>)\n
Example CKkc_to_KC:
iree_linalg_ext.unpack %arg1 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n inner_tiles = [32, 8] into %arg0 : (memref<32x4x32x8xf32> memref<128x256xf32>)\n
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription outer_dims_perm
::mlir::DenseI64ArrayAttri64 dense array attribute inner_dims_pos
::mlir::DenseI64ArrayAttri64 dense array attribute static_inner_tiles
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_2","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values inner_tiles
variadic of index"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_2","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extunset_encoding-linalgextunsetencodingop","title":"iree_linalg_ext.unset_encoding
(LinalgExt::UnsetEncodingOp)","text":"Perfom unpack and extract operation on source
Syntax:
operation ::= `iree_linalg_ext.unset_encoding` attr-dict $source `:` type($source) `->` type($result)\n
Operation to convert an tensor with encoding that represents its data layout into a tensor with default layout (i.e. no encoding). For now in IREE the default layout is row-major.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_3","title":"Operands:","text":"Operand Description source
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_3","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extupper_bound_tile_size-linalgextupperboundtilesizeop","title":"iree_linalg_ext.upper_bound_tile_size
(LinalgExt::UpperBoundTileSizeOp)","text":"Returns an upper bound on tile sizes
Syntax:
operation ::= `iree_linalg_ext.upper_bound_tile_size` attr-dict $tensorType `->` type($results)\n
This returns the largest tile sizes that might result from materialization of the given encoding. This can be used outside of target-specific code, so there may be multiple targets, and this will return the maximum tile size from iterating over all of them. The evaluation happens in the MaterializeUpperBoundTileSize pass.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription tensorType
::mlir::TypeAttrtype attribute of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_4","title":"Results:","text":"Result Description results
variadic of index"},{"location":"reference/mlir-dialects/IREELinalgExt/#non-structured-ops","title":"Non-structured ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extattention-linalgextattentionop","title":"iree_linalg_ext.attention
(LinalgExt::AttentionOp)","text":"Attention operator
Syntax:
operation ::= `iree_linalg_ext.attention` attr-dict\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($results)^)?\n
This operator takes in 3 tensors: query(Q), key(K) and value(V) and computes the attention. For self-attention, all inputs have the same shape BxNxd where B is the of the batch dimension, N is the sequence length and d is head dimension. Typically N >>> d. Mathematically, the attention is defined as matmul(softmax(matmul(Q, transpose(K))), V) and has shape BxNxd. Usually, this operator also performs scaling, masking and dropout, but we leave that out of the current implementation. For cross-attention, the query and output have the same shape (BxNxd), while the key and value differ in sequence length (they have shape BxLxd, where L != N). This operator after tiling results in a tiled result as per flash attention and results in the current max
and sum
statistics while processing the current tile.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_4","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_5","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extfft-linalgextfftop","title":"iree_linalg_ext.fft
(LinalgExt::FftOp)","text":"Fft operator
Syntax:
operation ::= `iree_linalg_ext.fft` attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n (`:` type($results)^)?\n
Apply 1D FFT to innermost dim. This is an iterative FFT, not recurrsive. Thus, the bit reversal is assumed applied on the input. The op carries an input -- stage, which indicates the level of reduction loop in the algorithm. It represents the computation body. For more details, see \"Data reordering, bit reversal, and in-place algorithms\" section in https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm
The size of innermost dim is expected to be a power of 2.
It is optional to carry coefficient tensors/buffers as inputs. In this context, they will be the second and third inputs.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_5","title":"Operands:","text":"Operand Description inputs
variadic of any type outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_6","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extreverse-linalgextreverseop","title":"iree_linalg_ext.reverse
(LinalgExt::ReverseOp)","text":"Reverse operator
Syntax:
operation ::= `iree_linalg_ext.reverse` attr-dict `dimensions` `(` $dimensions `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n (`outs` `(` $outputs^ `:` type($outputs) `)`)?\n (`:` type($results)^)?\n
A temporary solution for lowering reverse ops into IREE, allowing IREE to tile and distribute them. }
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription dimensions
::mlir::DenseIntElementsAttr64-bit signless integer elements attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_6","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_7","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extscan-linalgextscanop","title":"iree_linalg_ext.scan
(LinalgExt::ScanOp)","text":"Scan operator
Syntax:
operation ::= `iree_linalg_ext.scan` attr-dict\n `dimension` `(` $dimension `)`\n `inclusive` `(` $inclusive `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Computes the inclusive/exclusive scan along a given dimension.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute inclusive
::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_7","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_8","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extscatter-linalgextscatterop","title":"iree_linalg_ext.scatter
(LinalgExt::ScatterOp)","text":"Scatter operator
Syntax:
operation ::= `iree_linalg_ext.scatter` attr-dict `dimension_map` `=` $dimension_map\n `unique_indices` `(` $unique_indices `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Based on XLA operation semantics, takes two inputs
(update
and indices
) and outputs
value (original
). The operation updates the value at the slices specified by indices
by combining the current value with the value in updates
using the computation specified in region
. The region
specifies a binary operation of signature (T, T) -> T, where T
is the element-type of updates
(and original
). The first argument correspond the value to be updated (i.e. from updates
), and the second the current value (i.e. value from original
).
The indices
is a 2D tensor/memref type. The first dim is the number of updates, and the second dim is index depth. The index depth should always be static.
The first dim of updates
and indices
is identical, since they represent the number of updates.
The rank of the original
/result
is at least index_depth + rank(%updates) - 1
. The first index_depth
indices are derived from indices
and the shape of update value has the last rank(%original) - index_depth values match %(originals) last dimensions, with the previous dims extending from the index offsets.
The dimension_map attributes describes which index value maps to which dimension in the destionation. It cannot contain duplicate values, must have as many entries as index depth, and values must be within the rank of the destination.
The unique_indices attribute carries the information whether all the indices are unique. If there are repeated indices, the first iteration loop will be marked as reduction.
The shapes definition follows tensorflow operations execept that it force batch dims to be 1D. See more information in https://www.tensorflow.org/api_docs/python/tf/tensor_scatter_nd_update
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension_map
::mlir::DenseI64ArrayAttri64 dense array attribute unique_indices
::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_8","title":"Operands:","text":"Operand Description inputs
variadic of ranked tensor or memref of any type values outputs
variadic of ranked tensor or memref of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_9","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extsort-linalgextsortop","title":"iree_linalg_ext.sort
(LinalgExt::SortOp)","text":"Sort operator
Syntax:
operation ::= `iree_linalg_ext.sort` attr-dict\n `dimension` `(` $dimension `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Based on XLA operation semantics, sorts the given operands
at the given dimension
with the given comparator
.
See https://www.tensorflow.org/xla/operation_semantics#sort.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_9","title":"Operands:","text":"Operand Description inputs
variadic of any type outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_10","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_exttopk-linalgexttopkop","title":"iree_linalg_ext.topk
(LinalgExt::TopkOp)","text":"Top-K operator
Syntax:
operation ::= `iree_linalg_ext.topk` attr-dict\n `dimension` `(` $dimension `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
A Top-K operation for N-D tensors. Reduces the target dimension from the input size N down to K elements based on the supplied binary region.
Accepts an N-D tensor input consisting of values and an optioanl N-D tensor for indices of those values (i32 type). If input indices aren't provided, the index mapping is inferred based on the k dim. Both input values/indices tensors and output values/indicies tensors must have the same shape. Top-K is computed along the target dimension (from dimension()). Returns two output tensors of values and the indicies of Top-K results. The output dimensions must match the input save for the dimension that is reduced to K results.
Region accepts lhs=[next N input] and rhs=[exiting K output] and yeilds an i1. If true, the two values are swapped: - For Top-K compoarision: > - For Min-K comparision: < Note: when the two values are equal, the first occurence is always selected.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_10","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_11","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_exttransformdo_not_dce_operands-linalgextdonotdceoperandsop","title":"iree_linalg_ext.transform.do_not_dce_operands
(LinalgExt::DoNotDCEOperandsOp)","text":"Unfoldable op that just keeps its operands live
Syntax:
operation ::= `iree_linalg_ext.transform.do_not_dce_operands` attr-dict $operands `:` type($operands)\n
Unfoldable op that just keeps its operands live. This is to use with the transform dialect in case where transforms introduce IR that would be otherwise DCE'd by canonicalizations.
This op should be added to the transform dialect in the fullness of time but it can't be registered dynamically on the IREE side as that triggers errors since the op does not implement any transform interface.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_11","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extyield-linalgextyieldop","title":"iree_linalg_ext.yield
(LinalgExt::YieldOp)","text":"LinalgExt yield op
Syntax:
operation ::= `iree_linalg_ext.yield` attr-dict ($operands^ `:` type($operands))?\n
iree_linalg_ext.yield
is a special terminator operation for blocks inside regions in iree_linalg_ext
ops.
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_12","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#winograd-ops","title":"Winograd ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extwinogradinput_transform-linalgextwinogradinputtransformop","title":"iree_linalg_ext.winograd.input_transform
(LinalgExt::WinogradInputTransformOp)","text":"Winograd Input Transform operator
Syntax:
operation ::= `iree_linalg_ext.winograd.input_transform` attr-dict\n `output_tile_size` `(` $output_tile_size `)`\n `kernel_size` `(` $kernel_size `)`\n `image_dimensions` `(` $image_dimensions `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($result)^)?\n
This operator is the first step in converting a convolution to its Winograd equivalent. Given a tile of an input image (I), this operator computes matmul(tranpose(B), matmul(I, B)). The input tile is assumed to be square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. B is a constant 2-d square matrix of the same shape as the input tile I. The input to the operator is an image of shape (N, H, W, C) or (N, C, H, W) and the output is an operator of shape (m + r - 1, m + r - 1, N, H', W', C) where H' = ceil((H - m + 1)/r) and W' = ceil((W - m + 1)/r). The result of this operator is first collapsed and then fed to a batch matmul op.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription output_tile_size
::mlir::IntegerAttr64-bit signless integer attribute kernel_size
::mlir::IntegerAttr64-bit signless integer attribute image_dimensions
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_13","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_12","title":"Results:","text":"Result Description result
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extwinogradoutput_transform-linalgextwinogradoutputtransformop","title":"iree_linalg_ext.winograd.output_transform
(LinalgExt::WinogradOutputTransformOp)","text":"Winograd Output Transform operator
Syntax:
operation ::= `iree_linalg_ext.winograd.output_transform` attr-dict\n `output_tile_size` `(` $output_tile_size `)`\n `kernel_size` `(` $kernel_size `)`\n `image_dimensions` `(` $image_dimensions `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($result)^)?\n
This operator is the last transform in converting a convolution to its Winograd equivalent. After convolution in the Winograd domain (which turns into an elementwise product for a single channel and batch matrix multiplication for many channels), this operator converts the output back into the original domain. Given a tile of the output (O) in the Winograd domain, this operator computes matmul(transpose(A), matmul(O, A)). The output tile is square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. A is a constant 2-d matrix of shape (m + r - 1) x r. The input to the operator is a tensor of shape (m + r - 1, m + r - 1, N, H', W', C) and the output is a tensor of shape (N, H, W, C) or (N, C, H, W) where H = r H' and W = r W'. This operator is followed by a tensor.extract_slice which extracts only the non-padded part of the output.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription output_tile_size
::mlir::IntegerAttr64-bit signless integer attribute kernel_size
::mlir::IntegerAttr64-bit signless integer attribute image_dimensions
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_14","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_13","title":"Results:","text":"Result Description result
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#encodingattr","title":"EncodingAttr","text":"information to decide how to data-tile a tensor
Syntax:
#iree_linalg_ext.encoding<\n EncodingUserAttr, # user\n EncodingRoleAttr, # role\n ArrayAttr, # element_types\n TypeAttr, # original_type\n IntegerAttr, # matmul_narrow_M\n IntegerAttr # matmul_narrow_N\n>\n
This attribute describes the change in the layout for a given tensor to execute subsequent operations on the tiled layout. The encoding serves as a way to represent the change in the way the data is laid out in memory without changing the logical rank/extent of the tensor itself. When required, the encoding can be used to explicitly manifest the layout change through operations like pack/unpack.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description user EncodingUserAttr
kind of operation using this tensor role EncodingRoleAttr
role of this tensor as an operand element_types ArrayAttr
element types of the user's operands original_type TypeAttr
type of the original tensor type before padding matmul_narrow_M IntegerAttr
optional M narrow dimension size (only for MATMUL and BATCH_MATMUL users) matmul_narrow_N IntegerAttr
optional N narrow dimension size (only for MATMUL and BATCH_MATMUL users)"},{"location":"reference/mlir-dialects/IREELinalgExt/#encodingroleattr","title":"EncodingRoleAttr","text":"Describes the role of the tensor as an operand or a result of an operation.
Syntax:
#iree_linalg_ext.role<\n ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole # value\n>\n
Enum cases: * LHS (LHS
) * RHS (RHS
) * RESULT (RESULT
)
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole
an enum of type EncodingRole"},{"location":"reference/mlir-dialects/IREELinalgExt/#encodinguserattr","title":"EncodingUserAttr","text":"Describes the operation that a tensor is an operand or a result of.
Syntax:
#iree_linalg_ext.user<\n ::mlir::iree_compiler::IREE::LinalgExt::EncodingUser # value\n>\n
Enum cases: * MATMUL (MATMUL
) * BATCH_MATMUL (BATCH_MATMUL
)
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::LinalgExt::EncodingUser
an enum of type EncodingUser"},{"location":"reference/mlir-dialects/IREEVectorExt/","title":"IREEVectorExt","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_ext-dialect","title":"'iree_vector_ext' Dialect","text":"IREE Vector Extensions.
A dialect designed for experimenting with vector operations beyond what is currently available in the Vector Dialect.
- 'iree_vector_ext' Dialect
- Operation definition
- iree_vector_ext.layout_conflict_resolution (VectorExt::LayoutConflictResolutionOp)
- Attribute definition
- LayoutAttr
- PerDimLayoutAttr
"},{"location":"reference/mlir-dialects/IREEVectorExt/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extlayout_conflict_resolution-vectorextlayoutconflictresolutionop","title":"iree_vector_ext.layout_conflict_resolution
(VectorExt::LayoutConflictResolutionOp)","text":"Layout Conflict Resolution operator
Syntax:
operation ::= `iree_vector_ext.layout_conflict_resolution` $input attr-dict `:` type($input) `->` type($output)\n
The layout conflict resolution operator takes a vector and a desired layout and transforms the vector to one with the desired layout.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription sourceLayout
::mlir::iree_compiler::IREE::VectorExt::LayoutAttrhigh-dimensional vector register layout for a given vector desiredLayout
::mlir::iree_compiler::IREE::VectorExt::LayoutAttrhigh-dimensional vector register layout for a given vector"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands","title":"Operands:","text":"Operand Description input
vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results","title":"Results:","text":"Result Description output
vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#layoutattr","title":"LayoutAttr","text":"high-dimensional vector register layout for a given vector
This contains a complete specification of the layout for a given vector, whereas the attribute above only specifies the per dimension layout.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description layouts ::llvm::ArrayRef<PerDimLayoutAttr>
layout for each dimension of the vector"},{"location":"reference/mlir-dialects/IREEVectorExt/#perdimlayoutattr","title":"PerDimLayoutAttr","text":"high-dimensional vector register layout for a given vector dimension
This attribute describes the per dimension register layout for a given vector that could be prescribed by an operator such as matrix multiplication. This is a way to explicitly represent the layout in the IR when it is in the SIMD form prior to converting to the SIMT form so that we can reason about layouts, propagating layouts and layout conflicts.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description labels ::llvm::ArrayRef<std::string>
labels for the high dimensional layout dims shapes ::llvm::ArrayRef<int64_t>
shapes for the high dimensional layout dims"},{"location":"reference/mlir-dialects/Stream/","title":"Stream","text":""},{"location":"reference/mlir-dialects/Stream/#stream-dialect","title":"'stream' Dialect","text":"A dialect designed to model execution partitioning and scheduling.
The stream dialect is designed to take tensor programs and convert them to explicitly scheduled asynchronous programs. This includes placing ops on specific targets, partitioning the work between the targets, scheduling the work for concurrency, and encoding tensors into target-specific resources.
+--------+ +----------+ +-------+\n| flow.* | -> | stream.* | -> | hal.* |\n+--------+ +----------+ +-------+\n
This sits in-between the flow
and hal
dialects.
-
flow
models tensor programs by separating work into dispatchable functions in order to isolate the main host program data flow and the dense tensor compute operations.
-
stream
models explicitly scheduled asynchronous programs by partitioning the dispatchable work, specifying target affinities, encoding tensors into target-specific forms, and scheduling the work to run concurrently.
-
hal
models a low-level hardware abstraction layer used to manage buffers and issue asynchronous work across a variety of device types. The dialect is largely 1:1 with the IREE HAL C API.
Transforms in the dialect lower tensor values into opaque resources with the goal of ensuring no tensors survive in the IR. At entry stream.tensor.*
ops are used to capture the source tensor encoding information (data type, shapes, etc) and then lowered into stream.async.*
ops that model the asynchronous workloads on the opaque resources. The asynchronous operations are then partitioned, allocated, and scheduled for execution using the stream.cmd.*
ops.
It's intended that after transformation through the stream dialect the program is ready for execution on an abstract machine. At this level of representation buffers have still not been allocated and devices are not yet resolved, however the information captured in the stream
IR allows such operations to be done trivially. To this end all ops carry the symbolic size of the resources on which they operate as well as the lifetime of the resources they are acting upon. This manifests in the usage of the !stream.resource
type:
// Unresolved lifetime (resolved during the iree-stream-refine-usage pass):\n!stream.resource<*>\n// An externally managed value (passed in via the program API).\n!stream.resource<external>\n// A staging buffer for uploads/downloads.\n!stream.resource<staging>\n// A short-lived value that is used across streams.\n!stream.resource<transient>\n// A long-lived value that persists across streams in globals.\n!stream.resource<variable>\n// An immutable value that persists for the duration of the program.\n!stream.resource<constant>\n
Operations using resources carry the size of all operand result resources:
// %update (40 bytes) is being inserted into %target (296 bytes).\n// Can be dynamic values such as those originating from dynamic dimensions.\n%13 = stream.async.update %update, %target[%c256 to %c296] :\n !stream.resource<transient>{%c40} ->\n %target as !stream.resource<transient>{%c296}\n
Once all stream.async.*
work is moved into executable regions (such as stream.async.execute
) !stream.timepoint
values are used to sequence the execution. These timepoints represent some point in time where all execution up to that timepoint has completed and any results that were produced by the execution are available for use. Attempting to use the resources before their corresponding timepoint has been reached will lead to undefined behavior. The benefit of this is that after timepoints are established in the IR it's possible to induce aliasing of resources without breaking execution correctness.
- 'stream' Dialect
- Operation definition
- Async control flow ops
- stream.async.call (Stream::AsyncCallOp)
- stream.async.concurrent (Stream::AsyncConcurrentOp)
- stream.async.execute (Stream::AsyncExecuteOp)
- stream.async.func (Stream::AsyncFuncOp)
- Channel ops
- stream.channel.count (Stream::ChannelCountOp)
- stream.channel.create (Stream::ChannelCreateOp)
- stream.channel.rank (Stream::ChannelRankOp)
- stream.channel.split (Stream::ChannelSplitOp)
- Executable ops
- stream.binding.subspan (Stream::BindingSubspanOp)
- stream.executable.end (Stream::ExecutableEndOp)
- stream.executable.export (Stream::ExecutableExportOp)
- stream.executable (Stream::ExecutableOp)
- Execution context ops
- stream.context.resolve (Stream::ContextResolveOp)
- Explicit command ops
- stream.cmd.call (Stream::CmdCallOp)
- stream.cmd.collective (Stream::CmdCollectiveOp)
- stream.cmd.concurrent (Stream::CmdConcurrentOp)
- stream.cmd.copy (Stream::CmdCopyOp)
- stream.cmd.discard (Stream::CmdDiscardOp)
- stream.cmd.dispatch (Stream::CmdDispatchOp)
- stream.cmd.execute (Stream::CmdExecuteOp)
- stream.cmd.fill (Stream::CmdFillOp)
- stream.cmd.flush (Stream::CmdFlushOp)
- stream.cmd.func (Stream::CmdFuncOp)
- stream.cmd.invalidate (Stream::CmdInvalidateOp)
- stream.cmd.serial (Stream::CmdSerialOp)
- File ops
- stream.file.constant (Stream::FileConstantOp)
- stream.file.read (Stream::FileReadOp)
- stream.file.write (Stream::FileWriteOp)
- Miscellaneous ops
- stream.return (Stream::ReturnOp)
- stream.yield (Stream::YieldOp)
- Pseudo Ops
- stream.tensor.export (Stream::TensorExportOp)
- stream.tensor.import (Stream::TensorImportOp)
- Resource ops
- stream.resource.alloc (Stream::ResourceAllocOp)
- stream.resource.alloca (Stream::ResourceAllocaOp)
- stream.resource.constants (Stream::ResourceConstantsOp)
- stream.resource.dealloca (Stream::ResourceDeallocaOp)
- stream.resource.load (Stream::ResourceLoadOp)
- stream.resource.pack (Stream::ResourcePackOp)
- stream.resource.size (Stream::ResourceSizeOp)
- stream.resource.store (Stream::ResourceStoreOp)
- stream.resource.subview (Stream::ResourceSubviewOp)
- stream.resource.try_map (Stream::ResourceTryMapOp)
- Resource parameter I/O ops
- stream.parameter.gather (Stream::ParameterGatherOp)
- stream.parameter.load (Stream::ParameterLoadOp)
- stream.parameter.read (Stream::ParameterReadOp)
- stream.parameter.scatter (Stream::ParameterScatterOp)
- stream.parameter.write (Stream::ParameterWriteOp)
- Resource transfer ops
- stream.async.alloca (Stream::AsyncAllocaOp)
- stream.async.clone (Stream::AsyncCloneOp)
- stream.async.collective (Stream::AsyncCollectiveOp)
- stream.async.constant (Stream::AsyncConstantOp)
- stream.async.copy (Stream::AsyncCopyOp)
- stream.async.dispatch (Stream::AsyncDispatchOp)
- stream.async.fill (Stream::AsyncFillOp)
- stream.async.load (Stream::AsyncLoadOp)
- stream.async.slice (Stream::AsyncSliceOp)
- stream.async.splat (Stream::AsyncSplatOp)
- stream.async.store (Stream::AsyncStoreOp)
- stream.async.transfer (Stream::AsyncTransferOp)
- stream.async.update (Stream::AsyncUpdateOp)
- Synchronization ops
- stream.timepoint.await (Stream::TimepointAwaitOp)
- stream.timepoint.barrier (Stream::TimepointBarrierOp)
- stream.timepoint.chain_external (Stream::TimepointChainExternalOp)
- stream.timepoint.export (Stream::TimepointExportOp)
- stream.timepoint.immediate (Stream::TimepointImmediateOp)
- stream.timepoint.import (Stream::TimepointImportOp)
- stream.timepoint.join (Stream::TimepointJoinOp)
- Tensor ops
- stream.tensor.clone (Stream::TensorCloneOp)
- stream.tensor.constant (Stream::TensorConstantOp)
- stream.tensor.empty (Stream::TensorEmptyOp)
- stream.tensor.fill (Stream::TensorFillOp)
- stream.tensor.load (Stream::TensorLoadOp)
- stream.tensor.sizeof (Stream::TensorSizeOfOp)
- stream.tensor.slice (Stream::TensorSliceOp)
- stream.tensor.splat (Stream::TensorSplatOp)
- stream.tensor.store (Stream::TensorStoreOp)
- stream.tensor.trace (Stream::TensorTraceOp)
- stream.tensor.update (Stream::TensorUpdateOp)
- Attribute definition
- CollectiveAttr
- NamedParameterAttr
- PartitioningConfigAttr
- ResourceConfigAttr
- TimepointAttr
- Type constraint definition
- constant resource
- external resource
- staging resource
- transient resource
- resource
- variable resource
- Type definition
- BindingType
- ChannelType
- FileType
- ResourceType
- TimepointType
"},{"location":"reference/mlir-dialects/Stream/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Stream/#async-control-flow-ops","title":"Async control flow ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasynccall-streamasynccallop","title":"stream.async.call
(Stream::AsyncCallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `stream.async.call` (`on` `(` $affinity^ `)`)?\n $callee ``\n custom<DispatchOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_ends,\n $resource_operand_lengths) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands)\n
Calls a function taking/returning resource values with stream semantics. Asynchronous calls must have no side-effects.
Note that returned resources must have their sizes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the size (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If sizes are unknownable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.
Traits: AttrSizedOperandSegments, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, CallOpInterface, Stream_AffinityOp, Stream_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type or any type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_ends
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamasyncconcurrent-streamasyncconcurrentop","title":"stream.async.concurrent
(Stream::AsyncConcurrentOp)","text":"Executes all ops concurrently
Syntax:
operation ::= `stream.async.concurrent` (`on` `(` $affinity^ `)`)?\n `with` ``\n custom<ResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands, $body)\n attr-dict-with-keyword\n
Represents a wave of work scheduled concurrently (each op executing at the same time). All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution.
Waves can be nested to create a DAG. For example, take the following graph:
|\n v---------+---------v\n+-------|-------+ +-------|-------+\n| v--+--v | | v--+--v |\n| +----+ +----+ | | +----+ +----+ |\n| | %a | | %b | | | | %c | | %d | |\n| +----+ +----+ | | +----+ +----+ |\n| +--v--+ | | +--v--+ |\n+-------|-------+ +-------|-------+\n +---------v---------+\n |\n
Represented with nested waves:
%0 = stream.async.concurrent with(%arg) -> ... {\n %1 = stream.async.concurrent with(%arg as %arg0) -> ... {\n %a = ...\n %b = ...\n stream.yield %a, %b\n }\n %2 = stream.async.concurrent with(%arg as %arg1) -> ... {\n %c = ...\n %d = ...\n stream.yield %c, %d\n }\n stream.yield %1, %2\n }\n
Traits: AttrSizedOperandSegments, HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ClosureOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_1","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_1","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncexecute-streamasyncexecuteop","title":"stream.async.execute
(Stream::AsyncExecuteOp)","text":"Executes a dependency-aware sequence of streamable ops
Syntax:
operation ::= `stream.async.execute` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `with` ``\n custom<ResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands, $body)\n `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested stream.async.concurrent
ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.
Traits: AttrSizedOperandSegments, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ClosureOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_2","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index result_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_2","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamasyncfunc-streamasyncfuncop","title":"stream.async.func
(Stream::AsyncFuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `stream.async.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n ``\n custom<ShapedFunctionSignature>($function_type,\n $tied_operands,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via stream.async.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove, Stream_AsyncPhaseOp
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type tied_operands
::mlir::ArrayAttr64-bit integer array attribute sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#channel-ops","title":"Channel ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamchannelcount-streamchannelcountop","title":"stream.channel.count
(Stream::ChannelCountOp)","text":"Returns the total number of participants in the group
Syntax:
operation ::= `stream.channel.count` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the total participant count in the collective communicator group.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_3","title":"Operands:","text":"Operand Description channel
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_3","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamchannelcreate-streamchannelcreateop","title":"stream.channel.create
(Stream::ChannelCreateOp)","text":"Creates a new channel for collective communication
Syntax:
operation ::= `stream.channel.create` (`on` `(` $affinity^ `)`)?\n (`id` `(` $id^ `)`)?\n (`group` `(` $group^ `)`)?\n (`rank` `(` $rank^ `)`)?\n (`count` `(` $count^ `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a new channel with the given rank associated with the specified affinity. Collective operations using this channel must only be submitted on compatible affinities.
The group and ID are optional and may be null. The rank and count can be omitted to indicate a default inherited from the environment or device configuration at runtime.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription group
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_4","title":"Operands:","text":"Operand Description id
a reference counted byte buffer rank
index count
index"},{"location":"reference/mlir-dialects/Stream/#results_4","title":"Results:","text":"Result Description result
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#streamchannelrank-streamchannelrankop","title":"stream.channel.rank
(Stream::ChannelRankOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `stream.channel.rank` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_5","title":"Operands:","text":"Operand Description channel
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_5","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamchannelsplit-streamchannelsplitop","title":"stream.channel.split
(Stream::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `stream.channel.split` $channel `,` $color `,` $key\n `:` type($channel) `->` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_6","title":"Operands:","text":"Operand Description channel
a collective communication channel color
index key
index"},{"location":"reference/mlir-dialects/Stream/#results_6","title":"Results:","text":"Result Description result
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#executable-ops","title":"Executable ops","text":""},{"location":"reference/mlir-dialects/Stream/#streambindingsubspan-streambindingsubspanop","title":"stream.binding.subspan
(Stream::BindingSubspanOp)","text":"Returns an alias to a subspan of interface binding data
Syntax:
operation ::= `stream.binding.subspan` $binding `` `[` $byte_offset `]`\n attr-dict `:` type($binding) `->` type($result) (`{` $dynamic_dims^ `}`)?\n
Returns a subview to a tensor or memref-like type from a binding. The same binding may have multiple subviews at different byte offsets.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_7","title":"Operands:","text":"Operand Description binding
a managed resource binding into an executable scope byte_offset
index dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_7","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Stream/#streamexecutableend-streamexecutableendop","title":"stream.executable.end
(Stream::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `stream.executable.end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/Stream/#streamexecutableexport-streamexecutableexportop","title":"stream.executable.export
(Stream::ExecutableExportOp)","text":"
Defines an executable entry point for dispatch operations
Syntax:
operation ::= `stream.executable.export` custom<SymbolVisibility>($sym_visibility)\n custom<SymbolAlias>($sym_name, $function_ref)\n custom<WorkgroupCountRegion>($workgroup_count)\n attr-dict-with-keyword\n
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.
Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Stream/#streamexecutable-streamexecutableop","title":"stream.executable
(Stream::ExecutableOp)","text":"Generic executable module
Syntax:
operation ::= `stream.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Stream/#execution-context-ops","title":"Execution context ops","text":"Operations for interacting with the execution context that stream operations execute within.
"},{"location":"reference/mlir-dialects/Stream/#streamcontextresolve-streamcontextresolveop","title":"stream.context.resolve
(Stream::ContextResolveOp)","text":"Resolves low-level context resources based on type
Syntax:
operation ::= `stream.context.resolve` (`on` `(` $affinity^ `)`)?\n attr-dict `:` type($results)\n
WIP; allows for accessing the implementation details of lower-level dialects such as the HAL. This will likely be reworked in the future to either live inside other dialects, use some op interface instead of having a dedicated op here, or remove the op entirely and make resolution happen explicitly.
Examples:
// Returns a HAL device.\n= stream.context.resolve on(#something) : !hal.device\n// Returns a HAL device and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.device, i64\n// Returns a HAL allocator and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.allocator, i64\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#results_8","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#explicit-command-ops","title":"Explicit command ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamcmdcall-streamcmdcallop","title":"stream.cmd.call
(Stream::CmdCallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `stream.cmd.call` $callee ``\n custom<CmdCallOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_lengths,\n $resource_operand_accesses) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands),\n $resource_operand_sizes,\n type($results),\n $result_sizes,\n $tied_operands)\n
Calls a function operating on resource values with stream semantics. Asynchronous calls must have no side-effects.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: CallOpInterface, Stream_StreamableOp, Stream_SubviewEffectOp, SymbolUserOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute resource_operand_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_8","title":"Operands:","text":"Operand Description resource_operands
variadic of index or integer or floating-point or complex-type or resource or external resource or transient resource or variable resource or constant resource or any type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_9","title":"Results:","text":"Result Description results
variadic of index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamcmdcollective-streamcmdcollectiveop","title":"stream.cmd.collective
(Stream::CmdCollectiveOp)","text":"Dispatches a collective operation
Syntax:
operation ::= `stream.cmd.collective` `` $op `` `[` $element_count `]`\n `channel` `(` $channel `)`\n (`param` `(` $param^ `:` type($param) `)`)? `{`\n custom<DispatchResources>($resources, type($resources), $resource_sizes,\n $resource_offsets, $resource_lengths,\n $resource_accesses)\n `\\n` `}`\n attr-dict-with-keyword\n
Dispatches a collective operation specified against the device. If grouped with other collectives in a stream.cmd.concurrent
region the collective operations may fuse and execute more efficiently.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification resource_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_9","title":"Operands:","text":"Operand Description channel
a collective communication channel element_count
index param
32-bit signless integer resources
variadic of resource or external resource or transient resource or variable resource or constant resource resource_sizes
variadic of index resource_offsets
variadic of index resource_lengths
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdconcurrent-streamcmdconcurrentop","title":"stream.cmd.concurrent
(Stream::CmdConcurrentOp)","text":"Executes all ops concurrently
Syntax:
operation ::= `stream.cmd.concurrent` $body\n attr-dict-with-keyword\n
Represents a wave of work scheduled concurrently (each op executing at the same time).
Waves can be nested to create a DAG. For example, take the following graph:
|\n v---------+---------v\n+-------|-------+ +-------|-------+\n| v--+--v | | v--+--v |\n| +----+ +----+ | | +----+ +----+ |\n| | @a | | @b | | | | @c | | @d | |\n| +----+ +----+ | | +----+ +----+ |\n| +--v--+ | | +--v--+ |\n+-------|-------+ +-------|-------+\n +---------v---------+\n |\n
Represented with nested waves:
stream.cmd.concurrent {\n stream.cmd.concurrent {\n stream.cmd.dispatch @a\n stream.cmd.dispatch @b\n }\n stream.cmd.concurrent {\n stream.cmd.dispatch @c\n stream.cmd.dispatch @d\n }\n }\n
Traits: HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: RegionBranchOpInterface, Stream_StreamableOp
"},{"location":"reference/mlir-dialects/Stream/#streamcmdcopy-streamcmdcopyop","title":"stream.cmd.copy
(Stream::CmdCopyOp)","text":"Copies a subview of a stream resource to another
Syntax:
operation ::= `stream.cmd.copy` $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_10","title":"Operands:","text":"Operand Description source
any stream-compatible type source_size
index source_offset
index target
any stream-compatible type target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmddiscard-streamcmddiscardop","title":"stream.cmd.discard
(Stream::CmdDiscardOp)","text":"Discards a subview of a resource
Syntax:
operation ::= `stream.cmd.discard` $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Discards a subview of a resource, indicating that after this command the specified contents are no longer needed. This can be used to trim memory or invalidate caches.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_11","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmddispatch-streamcmddispatchop","title":"stream.cmd.dispatch
(Stream::CmdDispatchOp)","text":"Dispatches a parallelized grid of work
Syntax:
operation ::= `stream.cmd.dispatch` custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n (`(` $uniform_operands^ `:` type($uniform_operands) `)`)? `{`\n custom<DispatchResources>($resources, type($resources), $resource_sizes,\n $resource_offsets, $resource_lengths,\n $resource_accesses)\n `\\n` `}`\n attr-dict-with-keyword\n
Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, SymbolUserOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute resource_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_12","title":"Operands:","text":"Operand Description workload
variadic of index uniform_operands
variadic of index or integer or floating-point or complex-type resources
variadic of resource or external resource or transient resource or variable resource or constant resource resource_sizes
variadic of index resource_offsets
variadic of index resource_lengths
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdexecute-streamcmdexecuteop","title":"stream.cmd.execute
(Stream::CmdExecuteOp)","text":"Executes a dependency-aware sequence of streamable ops
Syntax:
operation ::= `stream.cmd.execute` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `with` ``\n custom<ExplicitResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n $body)\n `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested stream.cmd.concurrent
ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.
Traits: AttrSizedOperandSegments, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: ClosureOpInterface, InferTypeOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_13","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_10","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamcmdfill-streamcmdfillop","title":"stream.cmd.fill
(Stream::CmdFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.cmd.fill` $value `,`\n $target `[` $target_offset `for` $target_length `]` `:`\n type($value) `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_14","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_length
index value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#streamcmdflush-streamcmdflushop","title":"stream.cmd.flush
(Stream::CmdFlushOp)","text":"Flushes a subview of a resource
Syntax:
operation ::= `stream.cmd.flush` (`to` `(` $source_affinity^ `)`)?\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Transfers a resource to an external target. The resource memory is made available to the target and can be made visible there using stream.cmd.invalidate
.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_15","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmdfunc-streamcmdfuncop","title":"stream.cmd.func
(Stream::CmdFuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `stream.cmd.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name ``\n custom<DispatchFunctionSignature>($function_type,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via stream.cmd.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove, Stream_CmdPhaseOp
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#streamcmdinvalidate-streamcmdinvalidateop","title":"stream.cmd.invalidate
(Stream::CmdInvalidateOp)","text":"Invalidates a subview of a resource
Syntax:
operation ::= `stream.cmd.invalidate` (`from` `(` $source_affinity^ `)`)?\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Transfers a resource from an external source into the current target. The resource memory is assumed to have been made available at the source via stream.cmd.flush
.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_16","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmdserial-streamcmdserialop","title":"stream.cmd.serial
(Stream::CmdSerialOp)","text":"Executes all ops serially (in-order)
Syntax:
operation ::= `stream.cmd.serial` $body\n attr-dict-with-keyword\n
Represents a sequence of work scheduled serially (each op executing one after the other).
Regions can be nested to create a DAG. For example, take the following graph:
|\n v---------+-----v\n+-------|-------+ +---|----+\n| v--+--v | | v |\n| +----+ +----+ | | +----+ |\n| | @a | | @b | | | | @c | |\n| +----+ +----+ | | +----+ |\n| | | | | | |\n| | | | | +-v--+ |\n| | | | | | @d | |\n| | | | | +----+ |\n| +--v--+ | | | |\n+-------|-------+ +---|----+\n +---------v-----+\n |\n
Represented with nested regions:
stream.cmd.concurrent {\n stream.cmd.concurrent {\n stream.cmd.dispatch @a\n stream.cmd.dispatch @b\n }\n stream.cmd.serial {\n stream.cmd.dispatch @c\n stream.cmd.dispatch @d\n }\n }\n
Traits: HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: RegionBranchOpInterface, Stream_StreamableOp
"},{"location":"reference/mlir-dialects/Stream/#file-ops","title":"File ops","text":"File ops.
"},{"location":"reference/mlir-dialects/Stream/#streamfileconstant-streamfileconstantop","title":"stream.file.constant
(Stream::FileConstantOp)","text":"Creates a file backed by the provided constant host memory
Syntax:
operation ::= `stream.file.constant` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `for` $source_length `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Synchronously wraps a host heap buffer into a stream-accessible file handle. Changing the source buffer after definition has undefined behavior.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_17","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index source_length
index"},{"location":"reference/mlir-dialects/Stream/#results_11","title":"Results:","text":"Result Description result
a file handle used for I/O operations"},{"location":"reference/mlir-dialects/Stream/#streamfileread-streamfilereadop","title":"stream.file.read
(Stream::FileReadOp)","text":"Reads a segment of a file into a resource
Syntax:
operation ::= `stream.file.read` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `->`\n type($target) `` `{` $target_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a segment of a file into a resource.
Some implementations can stream directly from the source file into device-local memory and file ops should be preferred to manually staging memory through host buffers.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_18","title":"Operands:","text":"Operand Description source
a file handle used for I/O operations source_offset
64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_12","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamfilewrite-streamfilewriteop","title":"stream.file.write
(Stream::FileWriteOp)","text":"Writes a segment of a file from a resource
Syntax:
operation ::= `stream.file.write` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target)\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously writes a segment of a resource into a file. The file range must be valid within the file as this operation cannot grow the underlying file storage.
Some implementations can stream directly from device-local memory into the target file and file ops should be preferred to manually staging memory through host buffers.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_19","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index target
a file handle used for I/O operations target_offset
64-bit signless integer length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_13","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#miscellaneous-ops","title":"Miscellaneous ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamreturn-streamreturnop","title":"stream.return
(Stream::ReturnOp)","text":"Returns results from a region
Syntax:
operation ::= `stream.return` attr-dict\n $operands `:` type($operands)\n
The values returned are copied by-value.
Traits: AlwaysSpeculatableImplTrait, HasParent, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_20","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamyield-streamyieldop","title":"stream.yield
(Stream::YieldOp)","text":"Yields stream values from an execution region
Syntax:
operation ::= `stream.yield` attr-dict\n ($resource_operands^ `:`\n custom<SizeAwareTypeList>(type($resource_operands),\n $resource_operand_sizes))?\n
The values returned represent the asynchronous value at the point in time the SSA value is defined (or tied).
Traits: AlwaysSpeculatableImplTrait, HasParent, SameVariadicOperandSize, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_21","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#pseudo-ops","title":"Pseudo Ops","text":"Pseudo ops for conversion support.
"},{"location":"reference/mlir-dialects/Stream/#streamtensorexport-streamtensorexportop","title":"stream.tensor.export
(Stream::TensorExportOp)","text":"Conversion placeholder for stream->other type conversion
Syntax:
operation ::= `stream.tensor.export` (`on` `(` $affinity^ `)`)?\n $source `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Defines a conversion to a higher-level dialect type such as tensor
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_22","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource or staging resource source_encoding_dims
variadic of index source_size
index"},{"location":"reference/mlir-dialects/Stream/#results_14","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Stream/#streamtensorimport-streamtensorimportop","title":"stream.tensor.import
(Stream::TensorImportOp)","text":"Conversion placeholder for other->stream type conversion
Syntax:
operation ::= `stream.tensor.import` (`on` `(` $affinity^ `)`)?\n $source `:`\n type($source)\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `{` $result_size `}`\n attr-dict-with-keyword\n
Defines a conversion from a higher-level dialect type such as tensor
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_23","title":"Operands:","text":"Operand Description source
any type result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_15","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#resource-ops","title":"Resource ops","text":"Generic resource ops.
"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloc-streamresourceallocop","title":"stream.resource.alloc
(Stream::ResourceAllocOp)","text":"Allocates a persistent resource
Syntax:
operation ::= `stream.resource.alloc` (`on` `(` $affinity^ `)`)?\n (`uninitialized` $uninitialized^)?\n attr-dict `:`\n type($result) `{` $storage_size `}`\n
Allocates a persistent value (one that is long-lived and possibly external to the program) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
Uninitialized allocations will have undefined contents and must only be used when all bytes are discarded prior to any reads. Runtimes decide what \"undefined contents\" means and here it only indicates that execution will be correct even if the memory starts with non-zero values.
If multiple values are allocated from the same operation it implies that they have matching lifetimes. When lowering to execution environments the separate allocations may be fused into one or more slab allocations in order to reduce overheads. How many allocations can be fused is based on the size of the individual resources and the target constraints (how large any single buffer may be, etc).
Traits: AlwaysSpeculatableImplTrait
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription uninitialized
::mlir::UnitAttrunit attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_24","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#results_16","title":"Results:","text":"Result Description result
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloca-streamresourceallocaop","title":"stream.resource.alloca
(Stream::ResourceAllocaOp)","text":"Allocates a transient value with undefined contents
Syntax:
operation ::= `stream.resource.alloca` `uninitialized`\n (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n attr-dict\n type($result) `{` $storage_size `}`\n `=` `` `>`\n type($result_timepoint)\n
Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
The resource returned is not valid for use until the timepoint is reached; execution using this resource must await on the timepoint.
Traits: AlwaysSpeculatableImplTrait
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_25","title":"Operands:","text":"Operand Description storage_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_17","title":"Results:","text":"Result Description result
any stream-compatible type result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceconstants-streamresourceconstantsop","title":"stream.resource.constants
(Stream::ResourceConstantsOp)","text":"Asynchronously uploads or maps constant values
Syntax:
operation ::= `stream.resource.constants` (`on` `(` $affinity^ `)`)?\n attr-dict `:`\n custom<ConstantValueList>(type($results),\n $result_sizes,\n $values)\n `\\n` ` ` ` ` `=` `` `>` type($result_timepoint)\n
Represents an upload of constant resources that may be packed, suballocated, and mapped depending on the final lowering target.
In runtime environments where memory is shared between host and device this turns into a mapping operation that avoids additional memory allocation and copies. When memory cannot be shared an asynchronous stream will be created to allocate and copy all of the constant values.
Though this op returns a unique resource for each constant value it's expected that almost all end up aliasing into the same storage. The exact packing and number of storage resources that are needed are not known until lowering to a particular backend, though, so they are separate here for proper usage tracking.
Both constant and variable resources can be produced; a constant is immutable while a variable will be treated as a constant-value initializer for a mutable resource. By modeling these together it's not required that variable initializers first be allocated, copied to the target, and then copied into the variable storage if the target is capable of doing a direct upload or mapping.
Traits: AlwaysSpeculatableImplTrait, SameVariadicResultSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription values
::mlir::ArrayAttrconstant value array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_26","title":"Operands:","text":"Operand Description result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_18","title":"Results:","text":"Result Description results
variadic of constant resource or variable resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourcedealloca-streamresourcedeallocaop","title":"stream.resource.dealloca
(Stream::ResourceDeallocaOp)","text":"Frees a transient value when available
Syntax:
operation ::= `stream.resource.dealloca` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n $operand `:` type($operand) `{` $operand_size `}`\n `=` `` `>` type($result_timepoint)\n attr-dict\n
Deallocates a transient value (one that is short-lived and local to the current computation) previously allocated using stream.resource.alloca
.
The resource is considered live and valid until the provided timepoint is reached and the memory is only made available for future requests after the result timepoint is reached.
Interfaces: AffinityOpInterface, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_27","title":"Operands:","text":"Operand Description operand
any stream-compatible type operand_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_19","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceload-streamresourceloadop","title":"stream.resource.load
(Stream::ResourceLoadOp)","text":"Loads a value from a staging resource
Syntax:
operation ::= `stream.resource.load` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element(s) at the given offset in the staging resource. The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be transferred.
Interfaces: Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_28","title":"Operands:","text":"Operand Description source
staging resource source_size
index source_offset
index"},{"location":"reference/mlir-dialects/Stream/#results_20","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcepack-streamresourcepackop","title":"stream.resource.pack
(Stream::ResourcePackOp)","text":"Packs variable-sized slices into a single slab
Syntax:
operation ::= `stream.resource.pack` (`on` `(` $affinity^ `)`)?\n (`offset` `(` $offset^ `)`)?\n `slices` `(` `{`\n custom<PackSliceRanges>($lifetime_intervals,\n $dynamic_slice_sizes,\n type($packed_offsets))\n `}` `)`\n `:` type($total_length)\n attr-dict-with-keyword\n
Performs a greedy packing of one or more sized slices with specified lifetimes and returns their relative offsets in an aliased linear space.
Slices are [start, end] = %slice_byte_size
, where the start and end values define an inclusive lifetime range and the size is the total number of bytes required to be live for that range.
// Computes the total length required for the packed values and the offsets\n// of the 3 slices requested relative to the base of the packed memory:\n%total_length, %offset_0, %offset_1, %offset_2 =\n stream.resource.pack\n // Each slice gets one result offset:\n slices({\n // 3 slices where A and B overlap and will get unique offsets\n // while B and C do not overlap and are allowed to alias.\n [0, 10] = %size_0, // A => %offset_0\n [3, 8] = %size_1, // B => %offset_1\n [9, 10] = %size_2, // C => %offset_2\n ...\n }) : index\n
The lifetime start and end points (inclusive) are only used for relative comparisons and may originate with any meaning (op order in block, epoch, phase of the moon, etc). The packing algorithm uses the intervals to determine slice liveness and when aliasing is safe.
The size of each slice may either be a constant or runtime-computed dynamic value. Constant slices can achieve more dense packing than the dynamic values and CSE/canonicalization should be applied to ensure that as many of the dynamic values are equivalent if possible.
The total length required to pack all slices is returned and can be used to acquire storage. The individual slice offsets are 0-based and as such if are directly used as buffer offsets may need additional offsetting. This can either be applied via the optional offset
operand or slicing of the underlying allocation buffer.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription lifetime_intervals
::mlir::ArrayAttrindex array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_29","title":"Operands:","text":"Operand Description offset
index dynamic_slice_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_21","title":"Results:","text":"Result Description total_length
index packed_offsets
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamresourcesize-streamresourcesizeop","title":"stream.resource.size
(Stream::ResourceSizeOp)","text":"Returns the size of the resource storage in bytes
Syntax:
operation ::= `stream.resource.size` (`on` `(` $affinity^ `)`)?\n $operand\n attr-dict `:` type($operand)\n
Returns a possibly runtime-dynamic byte size of the resource backing storage. This may differ from the logical storage size of a value based on the alignment requirements of the target as well as encoding of higher level values such as sparse tensor formats.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_30","title":"Operands:","text":"Operand Description operand
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#results_22","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamresourcestore-streamresourcestoreop","title":"stream.resource.store
(Stream::ResourceStoreOp)","text":"Stores a value into a staging resource
Syntax:
operation ::= `stream.resource.store` $value `,`\n $target `[` $target_offset `]` `:`\n type($value)\n `->`\n type($target) `{` $target_size `}`\n attr-dict-with-keyword\n
The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be acquired.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#operands_31","title":"Operands:","text":"Operand Description target
staging resource target_size
index target_offset
index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcesubview-streamresourcesubviewop","title":"stream.resource.subview
(Stream::ResourceSubviewOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.resource.subview` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Aliases a byte subrange of a resource.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), StreamableOpInterface, TiedOpInterface, Util_SizeAwareOp, Util_SubrangeOp, ViewLikeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_32","title":"Operands:","text":"Operand Description source
any stream-compatible type source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_23","title":"Results:","text":"Result Description result
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcetry_map-streamresourcetrymapop","title":"stream.resource.try_map
(Stream::ResourceTryMapOp)","text":"Maps read-only memory into a resource
Syntax:
operation ::= `stream.resource.try_map` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `]` `:`\n type($source)\n `->`\n type($did_map) `,` type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Synchronously maps a host heap buffer into a stream-accessible resource with the requested lifetime. If the given source cannot be mapped the did_map
result will be 0 and users must find another route into memory (such as file I/O). The resulting resource is not coherent with the source and behavior is undefined if the underlying contents change.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_33","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_24","title":"Results:","text":"Result Description did_map
1-bit signless integer result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#resource-parameter-io-ops","title":"Resource parameter I/O ops","text":"Resource parameter I/O ops.
"},{"location":"reference/mlir-dialects/Stream/#streamparametergather-streamparametergatherop","title":"stream.parameter.gather
(Stream::ParameterGatherOp)","text":"Gathers multiple resources from a parameter scope
Syntax:
operation ::= `stream.parameter.gather` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `{`\n custom<ParameterGatherOperations>(\n $source_scope, $source_keys, $source_offsets,\n $target, type($target), $target_size, $target_offsets, $target_lengths)\n `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously gathers one or more resources into a single target stream resource. This is equivalent to one stream.parameter.read
per parameter but allows implementations that can batch operations to do so without additional timeline overhead.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_keys
::mlir::ArrayAttrstring array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_34","title":"Operands:","text":"Operand Description source_offsets
variadic of 64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offsets
variadic of index target_lengths
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_25","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterload-streamparameterloadop","title":"stream.parameter.load
(Stream::ParameterLoadOp)","text":"Reads a resource from a parameter scope
Syntax:
operation ::= `stream.parameter.load` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n custom<ParameterReference>($source_scope, $source_key)\n `` `[` $source_offset `]` `:`\n type($result) `` `{` $result_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a resource from an external parameter provider and returns the resulting stream resource. Depending on the resource type this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if stream.resource.alloca
and stream.parameter.read
had been used.
Traits: AlwaysSpeculatableImplTrait, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_35","title":"Operands:","text":"Operand Description source_offset
64-bit signless integer result_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_26","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterread-streamparameterreadop","title":"stream.parameter.read
(Stream::ParameterReadOp)","text":"Reads a resource from a parameter scope
Syntax:
operation ::= `stream.parameter.read` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n custom<ParameterReference>($source_scope, $source_key)\n `` `[` $source_offset `]` `->`\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a resource from an external parameter provider into the provided target resource range.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_36","title":"Operands:","text":"Operand Description source_offset
64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_27","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterscatter-streamparameterscatterop","title":"stream.parameter.scatter
(Stream::ParameterScatterOp)","text":"Scatters multiple resources to a parameter scope
Syntax:
operation ::= `stream.parameter.scatter` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `{`\n custom<ParameterScatterOperations>(\n $source, type($source), $source_size, $source_offsets, $source_lengths,\n $target_scope, $target_keys, $target_offsets)\n `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously scatters one or more resources from a single source resource into one or more parameters. This is equivalent to one stream.parameter.write
per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_keys
::mlir::ArrayAttrstring array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_37","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offsets
variadic of index source_lengths
variadic of index target_offsets
variadic of 64-bit signless integer await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_28","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterwrite-streamparameterwriteop","title":"stream.parameter.write
(Stream::ParameterWriteOp)","text":"Writes a resource to a parameter scope
Syntax:
operation ::= `stream.parameter.write` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n $source `[` $source_offset `for` $source_length `]` `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ParameterReference>($target_scope, $target_key)\n `` `[` $target_offset `]`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously writes a resource to an external parameter provider from the provided source resource range.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_38","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_length
index target_offset
64-bit signless integer await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_29","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#resource-transfer-ops","title":"Resource transfer ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasyncalloca-streamasyncallocaop","title":"stream.async.alloca
(Stream::AsyncAllocaOp)","text":"Allocates a transient value with undefined contents
Syntax:
operation ::= `stream.async.alloca` (`on` `(` $affinity^ `)`)?\n attr-dict `:` type($result) `{` $storage_size `}`\n
Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), StreamableOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_39","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#results_30","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncclone-streamasynccloneop","title":"stream.async.clone
(Stream::AsyncCloneOp)","text":"Clones the contents of a value
Syntax:
operation ::= `stream.async.clone` (`on` `(` $affinity^ `)`)?\n $source `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_40","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_31","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccollective-streamasynccollectiveop","title":"stream.async.collective
(Stream::AsyncCollectiveOp)","text":"Performs a collective operation
Syntax:
operation ::= `stream.async.collective` `` $op `` `[` $element_count `]`\n (`on` `(` $affinity^ `)`)?\n `channel` `(` $channel `)`\n custom<CollectiveParam>(ref($op), $param) ``\n $source `[` $source_offset `to` $source_end `for` $source_length `]` `,`\n $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
TODO: document different usage. For now this should be considered a prototype and that modeling of collective operations may change in the future to better ensure in-place operations (where send/recv is a subset of recv/send). We may have dedicated operations for the send and recv verbs as they have sequencing implications - or we could add optional sequencing to this base op.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_41","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index target_length
index source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index source_length
index element_count
index channel
a collective communication channel param
32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_32","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncconstant-streamasyncconstantop","title":"stream.async.constant
(Stream::AsyncConstantOp)","text":"Defines a constant resource
Syntax:
operation ::= `stream.async.constant` (`on` `(` $affinity^ `)`)?\n `:`\n type($result) `` `{` $result_size `}`\n `=`\n $value\n attr-dict-with-keyword\n
Returns a new resource with the given constant value.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_42","title":"Operands:","text":"Operand Description result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_33","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccopy-streamasynccopyop","title":"stream.async.copy
(Stream::AsyncCopyOp)","text":"Copies a subview of a stream resource to another
Syntax:
operation ::= `stream.async.copy` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `to` $source_end `]` `,`\n $target `[` $target_offset `to` $target_end `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource. Unlike stream.async.update
copy sources cannot be allocated in-place.
Equivalent to a stream.async.slice + stream.async.update.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_43","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index length
index"},{"location":"reference/mlir-dialects/Stream/#results_34","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncdispatch-streamasyncdispatchop","title":"stream.async.dispatch
(Stream::AsyncDispatchOp)","text":"Dispatches a parallelized grid of work
Syntax:
operation ::= `stream.async.dispatch` (`on` `(` $affinity^ `)`)?\n custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n custom<DispatchOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_ends,\n $resource_operand_lengths) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands)\n
Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.
Traits: AttrSizedOperandSegments, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, Stream_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_44","title":"Operands:","text":"Operand Description workload
variadic of index resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_ends
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_35","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncfill-streamasyncfillop","title":"stream.async.fill
(Stream::AsyncFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.async.fill` (`on` `(` $affinity^ `)`)?\n $value `,`\n $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n type($value) `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Equivalent to a stream.async.splat + stream.async.update.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_45","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index target_length
index value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_36","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncload-streamasyncloadop","title":"stream.async.load
(Stream::AsyncLoadOp)","text":"Loads a value from a resource
Syntax:
operation ::= `stream.async.load` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element at the given location from within the resource.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_46","title":"Operands:","text":"Operand Description source
staging resource source_size
index source_offset
index"},{"location":"reference/mlir-dialects/Stream/#results_37","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamasyncslice-streamasyncsliceop","title":"stream.async.slice
(Stream::AsyncSliceOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.async.slice` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `to` $source_end `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Slices a subrange of a stream resource based on a byte range. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_47","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_38","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncsplat-streamasyncsplatop","title":"stream.async.splat
(Stream::AsyncSplatOp)","text":"Splats a value into a resource
Syntax:
operation ::= `stream.async.splat` (`on` `(` $affinity^ `)`)?\n $value `:` type($value) `->` type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a new resource with the given primitive value splatted out to fill the entire contents.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_48","title":"Operands:","text":"Operand Description value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_39","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncstore-streamasyncstoreop","title":"stream.async.store
(Stream::AsyncStoreOp)","text":"Stores a value into a resource
Syntax:
operation ::= `stream.async.store` $value `,`\n $target `[` $target_offset `]` `:`\n type($value)\n `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Returns a resource with the element at the given offset set to the given value.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_49","title":"Operands:","text":"Operand Description target
staging resource target_size
index target_offset
index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_40","title":"Results:","text":"Result Description result
staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasynctransfer-streamasynctransferop","title":"stream.async.transfer
(Stream::AsyncTransferOp)","text":"Transfers a resource from one location/state to another
Syntax:
operation ::= `stream.async.transfer` (`from` `(` $source_affinity^ `)`)?\n $source `:`\n type($source) `` `{` $source_size `}` `->`\n (`to` `(` $result_affinity^ `)`)?\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Transfers a resource between different states (such as a staging
lifetime to a local
lifetime) or different affinities. This is roughly equivalent to a cast but may have special semantics when later lowered to one or more devices with discrete memory spaces or pools.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, Stream_StreamableOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity result_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_50","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource or staging resource source_size
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_41","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncupdate-streamasyncupdateop","title":"stream.async.update
(Stream::AsyncUpdateOp)","text":"Updates a slice of a subview of a resource in-place
Syntax:
operation ::= `stream.async.update` (`on` `(` $affinity^ `)`)?\n $update `,`\n $target `[` $target_offset `to` $target_end `]` `:`\n type($update) `` `{` $update_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a value into a resource based on a byte range. The returned value is the entire updated target value. Updates can be turned into placement allocations and avoid copies.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_51","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index update
resource or external resource or transient resource or variable resource or constant resource update_size
index"},{"location":"reference/mlir-dialects/Stream/#results_42","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#synchronization-ops","title":"Synchronization ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtimepointawait-streamtimepointawaitop","title":"stream.timepoint.await
(Stream::TimepointAwaitOp)","text":"Awaits a timepoint before returning a set of resources
Syntax:
operation ::= `stream.timepoint.await` (`on` `(` $affinity^ `)`)?\n $await_timepoint `=` `` `>`\n $resource_operands `:`\n custom<SizeAwareTypeList>(type($resource_operands),\n type($results), $resource_operand_sizes)\n attr-dict-with-keyword\n
After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables resolving the version of a resource after a particular point in the timeline. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_52","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_43","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtimepointbarrier-streamtimepointbarrierop","title":"stream.timepoint.barrier
(Stream::TimepointBarrierOp)","text":"Returns a timepoint indicating when a resource is available
Syntax:
operation ::= `stream.timepoint.barrier` (`on` `(` $affinity^ `)`)?\n $resource `:` type($resource) `` `{` $resource_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables identifying when the version of a resource after a particular point in the timeline is available. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_53","title":"Operands:","text":"Operand Description resource
resource or external resource or transient resource or variable resource or constant resource or staging resource resource_size
index"},{"location":"reference/mlir-dialects/Stream/#results_44","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointchain_external-streamtimepointchainexternalop","title":"stream.timepoint.chain_external
(Stream::TimepointChainExternalOp)","text":"Exports a timepoint to an external dialect type
Syntax:
operation ::= `stream.timepoint.chain_external` (`on` `(` $affinity^ `)`)?\n $await_timepoint\n `=` `` `>`\n `(` $external_values `:` type($external_values) `)`\n attr-dict-with-keyword\n
Defines a conversion to an external dialect type such as hal.fence
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Interfaces: Stream_AffinityOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_54","title":"Operands:","text":"Operand Description await_timepoint
a timepoint indicating execution availability external_values
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointexport-streamtimepointexportop","title":"stream.timepoint.export
(Stream::TimepointExportOp)","text":"Exports a timepoint to an external dialect type
Syntax:
operation ::= `stream.timepoint.export` (`on` `(` $affinity^ `)`)?\n $await_timepoint\n `=` `` `>`\n `(` type($results) `)`\n attr-dict-with-keyword\n
Defines a conversion to an external dialect type such as hal.fence
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_55","title":"Operands:","text":"Operand Description await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_45","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimmediate-streamtimepointimmediateop","title":"stream.timepoint.immediate
(Stream::TimepointImmediateOp)","text":"Results an immediately-available timepoint
Syntax:
operation ::= `stream.timepoint.immediate` attr-dict\n `=` `` `>` type($result_timepoint)\n
Timepoints indicate a point in the execution timeline and this op can be used to get a placeholder representing the start of the timeline. Any waits on the returned timepoint will resolve immediately. This generally folds away but can be useful if needing to initialize globals or branch args.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#results_46","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimport-streamtimepointimportop","title":"stream.timepoint.import
(Stream::TimepointImportOp)","text":"Imports a timepoint from an external dialect type
Syntax:
operation ::= `stream.timepoint.import` (`on` `(` $affinity^ `)`)?\n $operands `:` `(` type($operands) `)`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Defines a conversion from an external dialect type such as hal.semaphore
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_56","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#results_47","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointjoin-streamtimepointjoinop","title":"stream.timepoint.join
(Stream::TimepointJoinOp)","text":"Joins one or more timepoints into the max of all of them
Syntax:
operation ::= `stream.timepoint.join` `max` `(` $await_timepoints `)` `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Returns a timepoint that indicates that all of the input timepoints have been reached.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_57","title":"Operands:","text":"Operand Description await_timepoints
variadic of a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_48","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtensorclone-streamtensorcloneop","title":"stream.tensor.clone
(Stream::TensorCloneOp)","text":"Clones the contents of a value
Syntax:
operation ::= `stream.tensor.clone` (`on` `(` $affinity^ `)`)?\n $source `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_58","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_encoding_dims
variadic of index source_size
index result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_49","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorconstant-streamtensorconstantop","title":"stream.tensor.constant
(Stream::TensorConstantOp)","text":"Defines a constant tensor value
Syntax:
operation ::= `stream.tensor.constant` (`on` `(` $affinity^ `)`)?\n `:`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result)\n `=`\n $value\n attr-dict-with-keyword\n
Returns a typed resource initialized to the given constant value.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_49","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_59","title":"Operands:","text":"Operand Description result_encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_50","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorempty-streamtensoremptyop","title":"stream.tensor.empty
(Stream::TensorEmptyOp)","text":"Defines an empty tensor value
Syntax:
operation ::= `stream.tensor.empty` (`on` `(` $affinity^ `)`)?\n `:`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a typed resource initialized with no contents. This still carries shape metadata and may encode to a non-empty resource such as in cases where the empty representation still has data (e.g. sparse tensors). Subsequent writes must populate any ranges of the tensor that are later read.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_50","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_60","title":"Operands:","text":"Operand Description result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_51","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorfill-streamtensorfillop","title":"stream.tensor.fill
(Stream::TensorFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.tensor.fill` (`on` `(` $affinity^ `)`)?\n $value `,` $target `[` $start_indices `for` $lengths `]` `:`\n type($value)\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Equivalent to a stream.tensor.splat + stream.tensor.update.
Traits: AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_51","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_61","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_encoding_dims
variadic of index target_size
index start_indices
variadic of index lengths
variadic of index value
index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#results_52","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorload-streamtensorloadop","title":"stream.tensor.load
(Stream::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `stream.tensor.load` $source (`[` $indices^ `]`)? `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_52","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_62","title":"Operands:","text":"Operand Description source
staging resource source_encoding_dims
variadic of index source_size
index indices
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_53","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamtensorsizeof-streamtensorsizeofop","title":"stream.tensor.sizeof
(Stream::TensorSizeOfOp)","text":"Calculates the storage size of a given high-level type
Syntax:
operation ::= `stream.tensor.sizeof` (`on` `(` $affinity^ `)`)?\n $encoding (`{` $encoding_dims^ `}`)?\n attr-dict `:` type($storage_size)\n
Target-dependent storage size calculation using a high-level annotated type. While within the stream dialect the storage size of a value is left as a placeholder using this op. The requisite target-specific parameters for expanding the size calculation are only available after affinities have been assigned.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_53","title":"Attributes:","text":"AttributeMLIR TypeDescription encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_63","title":"Operands:","text":"Operand Description encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_54","title":"Results:","text":"Result Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#streamtensorslice-streamtensorsliceop","title":"stream.tensor.slice
(Stream::TensorSliceOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.tensor.slice` (`on` `(` $affinity^ `)`)?\n $source `[` $start_indices `for` $lengths `]` `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Slices a subrange of a stream resource based on a tensor encoding. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_54","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_64","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_encoding_dims
variadic of index source_size
index start_indices
variadic of index lengths
variadic of index result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_55","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorsplat-streamtensorsplatop","title":"stream.tensor.splat
(Stream::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `stream.tensor.splat` (`on` `(` $affinity^ `)`)?\n $value\n `:` type($value)\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a typed resource initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, StreamableOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_55","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_65","title":"Operands:","text":"Operand Description value
index or integer or floating-point or complex-type result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_56","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorstore-streamtensorstoreop","title":"stream.tensor.store
(Stream::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `stream.tensor.store` $value `,`\n $target (`[` $indices^ `]`)? `:`\n type($value)\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_56","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_66","title":"Operands:","text":"Operand Description target
staging resource target_encoding_dims
variadic of index target_size
index indices
variadic of index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_57","title":"Results:","text":"Result Description result
staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtensortrace-streamtensortraceop","title":"stream.tensor.trace
(Stream::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `stream.tensor.trace` $key `=` `[`\n custom<EncodedResourceOperands>(\n $resources, type($resources), $resource_sizes,\n $resource_encodings, $resource_encoding_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
Interfaces: ShapeAwareOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_57","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute resource_encodings
::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_67","title":"Operands:","text":"Operand Description resources
variadic of staging resource resource_sizes
variadic of index resource_encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamtensorupdate-streamtensorupdateop","title":"stream.tensor.update
(Stream::TensorUpdateOp)","text":"Updates a slice of a subview of a resource in-place
Syntax:
operation ::= `stream.tensor.update` (`on` `(` $affinity^ `)`)?\n $update `,` $target `[` $start_indices `]` `:`\n $update_encoding (`` `{` $update_encoding_dims^ `}`)?\n `in`\n type($update) `` `{` $update_size `}`\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a value into a resource based on tensor encodings. The returned value is the entire updated target value.
Traits: AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_58","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute update_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_68","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_encoding_dims
variadic of index target_size
index start_indices
variadic of index update
resource or external resource or transient resource or variable resource or constant resource update_encoding_dims
variadic of index update_size
index"},{"location":"reference/mlir-dialects/Stream/#results_58","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/Stream/#collectiveattr","title":"CollectiveAttr","text":"collective operation and specification
Syntax:
#stream.collective<\n CollectiveKind, # kind\n std::optional<CollectiveReductionOp>, # reduction\n CollectiveElementType # element_type\n>\n
Specifies the collective operation to perform and any mode bits required.
"},{"location":"reference/mlir-dialects/Stream/#parameters","title":"Parameters:","text":"Parameter C++ type Description kind CollectiveKind
reduction std::optional<CollectiveReductionOp>
element_type CollectiveElementType
"},{"location":"reference/mlir-dialects/Stream/#namedparameterattr","title":"NamedParameterAttr","text":"named parameter referenced an optional scope and key
Syntax:
#stream.parameter.named<\n ::mlir::Type, # type\n StringAttr, # scope\n StringAttr, # key\n DictionaryAttr # config\n>\n
Species an externally-defined parameter that can be referenced by an optional scope defining a set of parameters and a key uniquely identifying the parameter within its scope.
"},{"location":"reference/mlir-dialects/Stream/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description type ::mlir::Type
scope StringAttr
key StringAttr
config DictionaryAttr
"},{"location":"reference/mlir-dialects/Stream/#partitioningconfigattr","title":"PartitioningConfigAttr","text":"defines partitioning configuration
Configures the partitioning algorithm to use and its configuration. Partitioning is useful to adjust when scheduling behavior of targets is radically different - such as single-threaded vs. multi-threaded CPUs or bespoke ML accelerators vs. general purpose GPUs. This mechanism controls the amount of concurrency, parallelism, memory consumption, and latency.
"},{"location":"reference/mlir-dialects/Stream/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description favor IREE::Stream::FavorAttr
"},{"location":"reference/mlir-dialects/Stream/#resourceconfigattr","title":"ResourceConfigAttr","text":"defines resource constraints configuration
Defines resource storage constraints. These allow for packing and layout algorithms to ensure they are producing usable results on target devices.
"},{"location":"reference/mlir-dialects/Stream/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description maxAllocationSize int64_t
minBufferOffsetAlignment int64_t
maxBufferRange int64_t
minBufferRangeAlignment int64_t
indexBits int64_t
aliasMutableBindings bool
memoryModel IREE::Stream::MemoryModel
"},{"location":"reference/mlir-dialects/Stream/#timepointattr","title":"TimepointAttr","text":"an immediately-resolved timepoint
"},{"location":"reference/mlir-dialects/Stream/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description type ::mlir::Type
"},{"location":"reference/mlir-dialects/Stream/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/Stream/#constant-resource","title":"constant resource","text":"Stream constants are immutable values that are available for the lifetime of the program once initialized.
"},{"location":"reference/mlir-dialects/Stream/#external-resource","title":"external resource","text":"Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#staging-resource","title":"staging resource","text":"Stream upload/download staging resource. These are used outside of streams and then transferred to other stream resources such as variables or transients for use inside of streams. Dispatches and several other operations cannot directly operate on these resources.
"},{"location":"reference/mlir-dialects/Stream/#transient-resource","title":"transient resource","text":"Stream transients represent asynchronously-available and sequenced values that have a short lifetime - often only passed between stream executions. It is expected that transient values are not stored in global state and have minimal lifetime as they may be heavily pooled or suballocated.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#resource","title":"resource","text":"A stream resource that has not yet had its lifetime calculated.
"},{"location":"reference/mlir-dialects/Stream/#variable-resource","title":"variable resource","text":"Stream variables represent asynchronously-available and sequenced values that have a long lifetime relative to the work being performed on them. These variables are often stored in global state and may live for the entire duration of the program.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Stream/#bindingtype","title":"BindingType","text":"a managed resource binding into an executable scope
Syntax: !stream.binding
A resource binding available within an executable dispatch function. The bindings map 1:1 with the resources bound during dispatch operations.
"},{"location":"reference/mlir-dialects/Stream/#channeltype","title":"ChannelType","text":"a collective communication channel
Syntax: !stream.channel
Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.
In programs that model SPMD behavior internally channels can be created or provided by hosting applications. For example, the program could expose a @set_channels(!util.list<!stream.channel>)
method that stores the channels in globals for use throughout the program allowing for application-controlled channel configuration.
"},{"location":"reference/mlir-dialects/Stream/#filetype","title":"FileType","text":"a file handle used for I/O operations
Syntax: !stream.file
A file handle that can be asynchronously read and written into/from stream resources.
"},{"location":"reference/mlir-dialects/Stream/#resourcetype","title":"ResourceType","text":"a managed resource
Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description lifetime IREE::Stream::Lifetime
"},{"location":"reference/mlir-dialects/Stream/#timepointtype","title":"TimepointType","text":"a timepoint indicating execution availability
Syntax: !stream.timepoint
Represents a point in the execution timeline that when resolved indicates that all of the execution prior to this timepoint has completed and the results of the execution are available for use. This includes transitive dependencies as well; if timepoint B is dependent on timepoint A then when B is available so too must be A.
"},{"location":"reference/mlir-dialects/Util/","title":"Util","text":""},{"location":"reference/mlir-dialects/Util/#util-dialect","title":"'util' Dialect","text":"A dialect used for types common across IREE subdialects.
- 'util' Dialect
- Operation definition
- Address/offset arithmetic ops
- util.align (Util::AlignOp)
- util.sizeof (Util::SizeOfOp)
- Buffer ops
- util.buffer.alloc (Util::BufferAllocOp)
- util.buffer.compare (Util::BufferCompareOp)
- util.buffer.constant (Util::BufferConstantOp)
- util.buffer.copy (Util::BufferCopyOp)
- util.buffer.dealloc (Util::BufferDeallocOp)
- util.buffer.fill (Util::BufferFillOp)
- util.buffer.load (Util::BufferLoadOp)
- util.buffer.size (Util::BufferSizeOp)
- util.buffer.slice (Util::BufferSliceOp)
- util.buffer.storage (Util::BufferStorageOp)
- util.buffer.store (Util::BufferStoreOp)
- util.buffer.subspan (Util::BufferSubspanOp)
- Compiler hint ops
- util.optimization_barrier (Util::OptimizationBarrierOp)
- util.unfoldable_constant (Util::UnfoldableConstantOp)
- util.unreachable (Util::UnreachableOp)
- Data type conversion ops
- util.numeric.optional_narrow (Util::NumericOptionalNarrowOp)
- Global ops
- util.global.address (Util::GlobalAddressOp)
- util.global.load.indirect (Util::GlobalLoadIndirectOp)
- util.global.load (Util::GlobalLoadOp)
- util.global (Util::GlobalOp)
- util.global.store.indirect (Util::GlobalStoreIndirectOp)
- util.global.store (Util::GlobalStoreOp)
- List ops
- util.list.create (Util::ListCreateOp)
- util.list.get (Util::ListGetOp)
- util.list.resize (Util::ListResizeOp)
- util.list.set (Util::ListSetOp)
- util.list.size (Util::ListSizeOp)
- Range arithmetic ops
- util.range.extents (Util::RangeExtentsOp)
- util.range.max (Util::RangeMaxOp)
- util.range.min (Util::RangeMinOp)
- Status ops
- util.status.check_ok (Util::StatusCheckOkOp)
- Structural ops
- util.initializer (Util::InitializerOp)
- util.initializer.return (Util::InitializerReturnOp)
- Type manipulation ops
- util.cast (Util::CastOp)
- util.cmp.eq (Util::CmpEQOp)
- util.null (Util::NullOp)
- Value utility ops
- util.switch (Util::SwitchOp)
- Type definition
- BufferType
- ListType
- ObjectType
- PtrType
- VariantType
"},{"location":"reference/mlir-dialects/Util/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Util/#addressoffset-arithmetic-ops","title":"Address/offset arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilalign-utilalignop","title":"util.align
(Util::AlignOp)","text":"Aligns up to a power-of-two alignment if required
Syntax:
operation ::= `util.align` $value `,` $alignment attr-dict `:` type($result)\n
Aligns |value| up to the given power-of-two |alignment| if required.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands","title":"Operands:","text":"Operand Description value
signless-integer-like alignment
signless-integer-like"},{"location":"reference/mlir-dialects/Util/#results","title":"Results:","text":"Result Description result
signless-integer-like"},{"location":"reference/mlir-dialects/Util/#utilsizeof-utilsizeofop","title":"util.sizeof
(Util::SizeOfOp)","text":"Returns the size in bytes of a datatype
Syntax:
operation ::= `util.sizeof` $sizedType attr-dict-with-keyword\n
Most datatypes have a static size at all layers of the compilation stack. However, those that only have a size for certain lowering flows can be challenging. This op represents such sizes in a way that can be specialized later.
Returns the size in bytes, rounded up to the next whole byte of the specified type. This op will fold to a constant index value for IntegerType and FloatType. All others are not folded.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription sizedType
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Util/#results_1","title":"Results:","text":"Result Description size
index"},{"location":"reference/mlir-dialects/Util/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/Util/#utilbufferalloc-utilbufferallocop","title":"util.buffer.alloc
(Util::BufferAllocOp)","text":"Allocates a buffer with undefined contents
Syntax:
operation ::= `util.buffer.alloc` `uninitialized`\n attr-dict\n `:`\n type($result) `` `{` $storage_size `}`\n
Allocates a buffer with undefined contents. Consumers of the allocated result must assume nothing of the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription alignment
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_1","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Util/#results_2","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercompare-utilbuffercompareop","title":"util.buffer.compare
(Util::BufferCompareOp)","text":"Compares a range of two buffers
Syntax:
operation ::= `util.buffer.compare` $lhs `[` $lhs_offset `]` `,`\n $rhs `[` $rhs_offset `]` `,`\n $length `:`\n type($lhs) `` `{` $lhs_size `}` `,`\n type($rhs) `` `{` $rhs_size `}`\n attr-dict-with-keyword\n
Returns true if the two ranges are bitwise equivalent, somewhat like memcmp.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_2","title":"Operands:","text":"Operand Description lhs
a reference counted byte buffer lhs_size
index lhs_offset
index rhs
a reference counted byte buffer rhs_size
index rhs_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#results_3","title":"Results:","text":"Result Description result
1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferconstant-utilbufferconstantop","title":"util.buffer.constant
(Util::BufferConstantOp)","text":"Constant host-side byte buffer
Syntax:
operation ::= `util.buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n
Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttrindex attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#results_4","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercopy-utilbuffercopyop","title":"util.buffer.copy
(Util::BufferCopyOp)","text":"Copies a range of bytes between buffers
Syntax:
operation ::= `util.buffer.copy` $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Copies a range of bytes as with memcpy (no overlapping).
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_3","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbufferdealloc-utilbufferdeallocop","title":"util.buffer.dealloc
(Util::BufferDeallocOp)","text":"Deallocates a buffer
Syntax:
operation ::= `util.buffer.dealloc` $operand `:` type($operand) `{` $operand_size `}`\n attr-dict-with-keyword\n
Hints that the buffer contents can be discarded. Buffers are reference counted and other owners may keep it live beyond the dealloc.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_4","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer operand_size
index"},{"location":"reference/mlir-dialects/Util/#utilbufferfill-utilbufferfillop","title":"util.buffer.fill
(Util::BufferFillOp)","text":"Fills a range of bytes with a value
Syntax:
operation ::= `util.buffer.fill` $pattern `,`\n $target `[` $target_offset `for` $length `]` `:`\n type($pattern) `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Fills the contents of the buffer in the given byte range with a pattern. The offset and length must match the natural alignment of the pattern type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_5","title":"Operands:","text":"Operand Description pattern
integer or floating-point or index target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbufferload-utilbufferloadop","title":"util.buffer.load
(Util::BufferLoadOp)","text":"Loads a value from a buffer
Syntax:
operation ::= `util.buffer.load` $source `[` $source_offset `for` $length `]`\n `:` type($source) `` `{` $source_size `}` `->` type($result)\n attr-dict-with-keyword\n
Loads a value at a byte offset. Must be aligned to the natural size of the result type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_6","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#results_5","title":"Results:","text":"Result Description result
index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#utilbuffersize-utilbuffersizeop","title":"util.buffer.size
(Util::BufferSizeOp)","text":"Returns the total buffer storage size in bytes
Syntax:
operation ::= `util.buffer.size` $operand\n `:` type($operand)\n attr-dict-with-keyword\n
Returns the total length of the buffer in bytes from its base offset.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_7","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#results_6","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Util/#utilbufferslice-utilbuffersliceop","title":"util.buffer.slice
(Util::BufferSliceOp)","text":"Clones a subregion of a buffer
Syntax:
operation ::= `util.buffer.slice` $source `[` $source_offset `]` attr-dict `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n
Returns a copy of the contents from the source buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription alignment
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_8","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Util/#results_7","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbufferstorage-utilbufferstorageop","title":"util.buffer.storage
(Util::BufferStorageOp)","text":"Returns the underlying buffer storage range
Syntax:
operation ::= `util.buffer.storage` $operand\n `:` type($operand) `` `{` $operand_size `}` `->` `(` type($result) `,` type($offset) `)`\n attr-dict-with-keyword\n
Returns the buffer storage as a memref that must be offset and restricted to the returned range. The memref may be of any type and the user is responsible for ensuring that the reinterpret_cast-like behavior makes sense for the data they are accessing.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_9","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer operand_size
index"},{"location":"reference/mlir-dialects/Util/#results_8","title":"Results:","text":"Result Description result
memref of any type values offset
index"},{"location":"reference/mlir-dialects/Util/#utilbufferstore-utilbufferstoreop","title":"util.buffer.store
(Util::BufferStoreOp)","text":"Stores a value into a buffer
Syntax:
operation ::= `util.buffer.store` $source `,`\n $target `[` $target_offset `for` $length `]`\n `:` type($source) `->` type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Stores a value at a byte offset. Must be aligned to the natural size of the source type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_10","title":"Operands:","text":"Operand Description source
index or integer or floating-point target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbuffersubspan-utilbuffersubspanop","title":"util.buffer.subspan
(Util::BufferSubspanOp)","text":"Returns a reference to a subrange of a buffer
Syntax:
operation ::= `util.buffer.subspan` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a logical view into an underlying source buffer. This induces aliasing and multiple SSA values may allow access to the same underlying buffer storage.
Subspans are a compiler-only concept and are propagated by an analysis pass to result in absolute offsets on accesses any place the subrange would have been used.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SubrangeOperandOpInterface, TiedOpInterface, Util_SizeAwareOp, Util_SubrangeOp, ViewLikeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_11","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Util/#results_9","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/Util/#utiloptimization_barrier-utiloptimizationbarrierop","title":"util.optimization_barrier
(Util::OptimizationBarrierOp)","text":"Prevents compiler optimizations across a value.
Syntax:
operation ::= `util.optimization_barrier` attr-dict\n ($operands^ `:` type($operands))?\n
Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.
Traits: SameOperandsAndResultType
"},{"location":"reference/mlir-dialects/Util/#operands_12","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_10","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilunfoldable_constant-utilunfoldableconstantop","title":"util.unfoldable_constant
(Util::UnfoldableConstantOp)","text":"A constant that cannot be folded by the compiler.
Similar to a std.constant, but is declared as having a side effect and has no folder. This is really just syntactic sugar as it is canonicalized to a std.constant wrapped in an util.optimization_barrier.
"},{"location":"reference/mlir-dialects/Util/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/Util/#results_11","title":"Results:","text":"Result Description \u00abunnamed\u00bb any type"},{"location":"reference/mlir-dialects/Util/#utilunreachable-utilunreachableop","title":"util.unreachable
(Util::UnreachableOp)","text":"Unreachable assertion op
Syntax:
operation ::= `util.unreachable` $message attr-dict\n
Signals to the compiler that the parent block should not be reachable. This may be converted into a runtime assertion, though ideally they are stripped during translation.
^bb0:\n %true = arith.constant true\n cond_br %true, ^bb2, ^bb1\n^bb1:\n // Indicates that this branch should never be taken.\n util.unreachable \"shouldn't be here\"\n^bb2:\n ...\n
Traits: ReturnLike, Terminator
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#data-type-conversion-ops","title":"Data type conversion ops","text":""},{"location":"reference/mlir-dialects/Util/#utilnumericoptional_narrow-utilnumericoptionalnarrowop","title":"util.numeric.optional_narrow
(Util::NumericOptionalNarrowOp)","text":"Memorializes an optional numeric narrowing that is valid
Syntax:
operation ::= `util.numeric.optional_narrow` $operand `:` type($operand) `as` $semantic_type attr-dict\n
Serves as a placeholder for points in the computation where an optional numeric narrowing can be performed without loss of information. Such ops can guide optimization passes wishing to perform precision reduction.
In addition to the operand and result type, this op takes an additional semantic_type
attribute representing the semantic target type which can be: * FloatType * Signed IntegerType * Unsigned IntegerType
Note that this semantic_type
must be a sign-carrying integer if using an integer type and cannot be IndexType (i.e. it can be used to indicate a possible narrowing of an IndexType to a specific integer).
If the operand is a TensorType, then the result must be a TensorType. The semantic_type
constrains the element type.
Optionally, the minimum and maximum integer values (for integer semantic types) are tracked if known.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription semantic_type
::mlir::TypeAttrany type attribute min_value
::mlir::IntegerAttrarbitrary integer attribute max_value
::mlir::IntegerAttrarbitrary integer attribute"},{"location":"reference/mlir-dialects/Util/#operands_13","title":"Operands:","text":"Operand Description operand
signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#results_12","title":"Results:","text":"Result Description result
signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/Util/#utilglobaladdress-utilglobaladdressop","title":"util.global.address
(Util::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `util.global.address` $global attr-dict `:` qualified(type($result))\n
Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalAddressOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#results_13","title":"Results:","text":"Result Description result
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilgloballoadindirect-utilgloballoadindirectop","title":"util.global.load.indirect
(Util::GlobalLoadIndirectOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `util.global.load.indirect` $global attr-dict `:` qualified(type($global)) `->` type($result)\n
Returns a copy of the global variable value.
Interfaces: Util_GlobalLoadIndirectOpInterface
"},{"location":"reference/mlir-dialects/Util/#operands_14","title":"Operands:","text":"Operand Description global
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#results_14","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilglobalload-utilgloballoadop","title":"util.global.load
(Util::GlobalLoadOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `util.global.load` $global attr-dict `:` type($result)\n
Returns a global variable value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#results_15","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilglobal-utilglobalop","title":"util.global
(Util::GlobalOp)","text":"Stateful global variable declaration
Syntax:
operation ::= `util.global` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different variable storage.
Interfaces: Symbol, Util_GlobalOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/Util/#utilglobalstoreindirect-utilglobalstoreindirectop","title":"util.global.store.indirect
(Util::GlobalStoreIndirectOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `util.global.store.indirect` $value `,` $global attr-dict `:` type($value) `->` qualified(type($global))\n
Stores a copy of the value into a global variable.
Interfaces: Util_GlobalStoreIndirectOpInterface
"},{"location":"reference/mlir-dialects/Util/#operands_15","title":"Operands:","text":"Operand Description value
any type global
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilglobalstore-utilglobalstoreop","title":"util.global.store
(Util::GlobalStoreOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `util.global.store` $value `,` $global attr-dict `:` type($value)\n
Stores a copy of the value into a global variable.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#operands_16","title":"Operands:","text":"Operand Description value
any type"},{"location":"reference/mlir-dialects/Util/#list-ops","title":"List ops","text":"Ops for !util.list<T>
(mostly just a placeholder for now).
"},{"location":"reference/mlir-dialects/Util/#utillistcreate-utillistcreateop","title":"util.list.create
(Util::ListCreateOp)","text":"Creates a new empty list
Syntax:
operation ::= `util.list.create` ($initial_capacity^)? attr-dict `:` qualified(type($result))\n
Creates a new empty list with an optional initial capacity.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_17","title":"Operands:","text":"Operand Description initial_capacity
index"},{"location":"reference/mlir-dialects/Util/#results_16","title":"Results:","text":"Result Description result
dense list container type"},{"location":"reference/mlir-dialects/Util/#utillistget-utillistgetop","title":"util.list.get
(Util::ListGetOp)","text":"Element accessor
Syntax:
operation ::= `util.list.get` $list `[` $index `]` attr-dict `:` custom<ListTypeGet>(type($list), type($result))\n
Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_18","title":"Operands:","text":"Operand Description list
dense list container type index
index"},{"location":"reference/mlir-dialects/Util/#results_17","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utillistresize-utillistresizeop","title":"util.list.resize
(Util::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `util.list.resize` operands attr-dict `:` qualified(type($list))\n
Resizes the list to contain new_size
elements. This will either truncate the list if the existing size is greater than new_size
or extend the list with the default list value of the element type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_19","title":"Operands:","text":"Operand Description list
dense list container type new_size
index"},{"location":"reference/mlir-dialects/Util/#utillistset-utillistsetop","title":"util.list.set
(Util::ListSetOp)","text":"Element mutator
Syntax:
operation ::= `util.list.set` $list `[` $index `]` `,` $value attr-dict `:` custom<ListTypeSet>(type($list), type($value))\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_20","title":"Operands:","text":"Operand Description list
dense list container type index
index value
any type"},{"location":"reference/mlir-dialects/Util/#utillistsize-utillistsizeop","title":"util.list.size
(Util::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `util.list.size` operands attr-dict `:` qualified(type($list))\n
Returns the current size of the list in elements.
Interfaces: InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_21","title":"Operands:","text":"Operand Description list
dense list container type"},{"location":"reference/mlir-dialects/Util/#results_18","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Util/#range-arithmetic-ops","title":"Range arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilrangeextents-utilrangeextentsop","title":"util.range.extents
(Util::RangeExtentsOp)","text":"Returns the min/max of a union of a set of ranges
Syntax:
operation ::= `util.range.extents` custom<RangeList>($offsets, $lengths) attr-dict `:` type($min)\n
Computes min(offsets) and max(offsets + lengths). Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_22","title":"Operands:","text":"Operand Description offsets
variadic of index or integer lengths
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_19","title":"Results:","text":"Result Description min
index or integer max
index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemax-utilrangemaxop","title":"util.range.max
(Util::RangeMaxOp)","text":"Returns the max of all values
Syntax:
operation ::= `util.range.max` $operands attr-dict `:` type($result)\n
Computes the max of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_23","title":"Operands:","text":"Operand Description operands
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_20","title":"Results:","text":"Result Description result
index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemin-utilrangeminop","title":"util.range.min
(Util::RangeMinOp)","text":"Returns the min of all values
Syntax:
operation ::= `util.range.min` $operands attr-dict `:` type($result)\n
Computes the min of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_24","title":"Operands:","text":"Operand Description operands
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_21","title":"Results:","text":"Result Description result
index or integer"},{"location":"reference/mlir-dialects/Util/#status-ops","title":"Status ops","text":""},{"location":"reference/mlir-dialects/Util/#utilstatuscheck_ok-utilstatuscheckokop","title":"util.status.check_ok
(Util::StatusCheckOkOp)","text":"Raises a global failure if a status is not 'ok'
Syntax:
operation ::= `util.status.check_ok` $status (`,` $message^)? attr-dict\n
When the status is not 'ok' this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
"},{"location":"reference/mlir-dialects/Util/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#operands_25","title":"Operands:","text":"Operand Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/Util/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/Util/#utilinitializer-utilinitializerop","title":"util.initializer
(Util::InitializerOp)","text":"Global initialization function
A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, Util_InitializerOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Util/#utilinitializerreturn-utilinitializerreturnop","title":"util.initializer.return
(Util::InitializerReturnOp)","text":"Return from a util.initializer
Syntax:
operation ::= `util.initializer.return` attr-dict\n
Returns control from an initializer function.
Traits: AlwaysSpeculatableImplTrait, HasParent, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#type-manipulation-ops","title":"Type manipulation ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcast-utilcastop","title":"util.cast
(Util::CastOp)","text":"Casts one util type to another ala static_cast/dynamic_cast
Syntax:
operation ::= `util.cast` $operand attr-dict `:` type($operand) `to` type($result)\n
Performs a type cast between object types known to the util dialect.
Traits: AlwaysSpeculatableImplTrait
Interfaces: CastOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_26","title":"Operands:","text":"Operand Description operand
any type"},{"location":"reference/mlir-dialects/Util/#results_22","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilcmpeq-utilcmpeqop","title":"util.cmp.eq
(Util::CmpEQOp)","text":"Compares two values for equality
Syntax:
operation ::= `util.cmp.eq` operands attr-dict `:` type($lhs)\n
Compares two operands for equality. This is intended for comparing IREE reference types (like !util.buffer) that cannot be used with std.cmpi.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_27","title":"Operands:","text":"Operand Description lhs
any type rhs
any type"},{"location":"reference/mlir-dialects/Util/#results_23","title":"Results:","text":"Result Description result
1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilnull-utilnullop","title":"util.null
(Util::NullOp)","text":"Returns a null type value
Syntax:
operation ::= `util.null` attr-dict `:` type($result)\n
Defines an SSA value that is lowered into dialects supporting null/undefined/optional/etc values.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#results_24","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#value-utility-ops","title":"Value utility ops","text":""},{"location":"reference/mlir-dialects/Util/#utilswitch-utilswitchop","title":"util.switch
(Util::SwitchOp)","text":"Primitive switch operation
Syntax:
operation ::= `util.switch` type($default_value) `from`\n custom<TypedValueList>(ref(type($default_value)), $values, type($values))\n `at` $index\n `else` $default_value\n attr-dict\n `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = util.switch %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_28","title":"Operands:","text":"Operand Description index
index default_value
index or integer or floating-point values
variadic of index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#results_25","title":"Results:","text":"Result Description result
index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Util/#buffertype","title":"BufferType","text":"a reference counted byte buffer
Syntax: !util.buffer
A reference counted byte buffer that models a pointer, offset, and length.
"},{"location":"reference/mlir-dialects/Util/#listtype","title":"ListType","text":"dense list container type
Syntax:
!util.list<\n Type # element_type\n>\n
Typed container supporting variant storage.
"},{"location":"reference/mlir-dialects/Util/#parameters","title":"Parameters:","text":"Parameter C++ type Description element_type Type
"},{"location":"reference/mlir-dialects/Util/#objecttype","title":"ObjectType","text":"a placeholder for an unspecified object type
Syntax: !util.object
Describes a runtime object type. These may be reference counted or garbage collected at runtime.
"},{"location":"reference/mlir-dialects/Util/#ptrtype","title":"PtrType","text":"a pointer-like reference
Syntax:
!util.ptr<\n Type # target_type\n>\n
A typed indirect reference to a value. These define a runtime addressable value that is strongly referenced.
"},{"location":"reference/mlir-dialects/Util/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description target_type Type
"},{"location":"reference/mlir-dialects/Util/#varianttype","title":"VariantType","text":"a placeholder for a variant type (?
)
Syntax: !util.variant
Describes a runtime variant type. These may be primitives (i32, f32, etc) or object types.
"},{"location":"reference/mlir-dialects/VM/","title":"VM","text":""},{"location":"reference/mlir-dialects/VM/#vm-dialect","title":"'vm' Dialect","text":"A dialect representing operations against an abstract virtual machine.
The virtual machine ops are designed to be either serialized to a bytecode representation that can be interpreted at runtime or lowered further to static representations such as LLVM IR, C, etc. The idea is that the types and operations performed are generally just encoding resource ownership rules and control flow that can be represented in many different ways by target runtimes. For example, it should be possible to lower the VM dialect to SPIR-V and run the VM entirely within a persistent Vulkan kernel.
With this scalable runtime approach we make some limiting assumptions to keep the required implementations simple. As we assume all real math is happening within dispatch regions the only math we provide is scalar operations used for offset and shape calculations. This also enables simple flow control such as fixed-range loops.
Besides integer values the only other storage type is a variant reference modeling an abstract iree_vm_ref_t. This allows automated reference counting to be relied upon by other dialects built on top of the VM dialect and avoids the need for more verbose manual reference counting logic (that may be difficult or impossible to manage given the coroutine-like nature of the VM). Lowering targets can insert the reference counting as needed.
The types in the VM dialect correspond to the storage rather than value type, with the interpretation of the type encoded on the op.
- 'vm' Dialect
- Operation definition
- Async/fiber ops
- vm.yield (VM::YieldOp)
- Bitwise shift and rotate ops
- vm.shl.i32 (VM::ShlI32Op)
- vm.shl.i64 (VM::ShlI64Op)
- vm.shr.i32.s (VM::ShrI32SOp)
- vm.shr.i32.u (VM::ShrI32UOp)
- vm.shr.i64.s (VM::ShrI64SOp)
- vm.shr.i64.u (VM::ShrI64UOp)
- Buffer ops
- vm.buffer.alloc (VM::BufferAllocOp)
- vm.buffer.clone (VM::BufferCloneOp)
- vm.buffer.compare (VM::BufferCompareOp)
- vm.buffer.copy (VM::BufferCopyOp)
- vm.buffer.fill.f32 (VM::BufferFillF32Op)
- vm.buffer.fill.f64 (VM::BufferFillF64Op)
- vm.buffer.fill.i16 (VM::BufferFillI16Op)
- vm.buffer.fill.i32 (VM::BufferFillI32Op)
- vm.buffer.fill.i64 (VM::BufferFillI64Op)
- vm.buffer.fill.i8 (VM::BufferFillI8Op)
- vm.buffer.length (VM::BufferLengthOp)
- vm.buffer.load.f32 (VM::BufferLoadF32Op)
- vm.buffer.load.f64 (VM::BufferLoadF64Op)
- vm.buffer.load.i16.s (VM::BufferLoadI16SOp)
- vm.buffer.load.i16.u (VM::BufferLoadI16UOp)
- vm.buffer.load.i32 (VM::BufferLoadI32Op)
- vm.buffer.load.i64 (VM::BufferLoadI64Op)
- vm.buffer.load.i8.s (VM::BufferLoadI8SOp)
- vm.buffer.load.i8.u (VM::BufferLoadI8UOp)
- vm.buffer.store.f32 (VM::BufferStoreF32Op)
- vm.buffer.store.f64 (VM::BufferStoreF64Op)
- vm.buffer.store.i16 (VM::BufferStoreI16Op)
- vm.buffer.store.i32 (VM::BufferStoreI32Op)
- vm.buffer.store.i64 (VM::BufferStoreI64Op)
- vm.buffer.store.i8 (VM::BufferStoreI8Op)
- Casting and conversion ops
- vm.bitcast.f32.i32 (VM::BitcastF32I32Op)
- vm.bitcast.f64.i64 (VM::BitcastF64I64Op)
- vm.bitcast.i32.f32 (VM::BitcastI32F32Op)
- vm.bitcast.i64.f64 (VM::BitcastI64F64Op)
- vm.cast.any.ref (VM::CastAnyRefOp)
- vm.cast.f32.si32 (VM::CastF32SI32Op)
- vm.cast.f32.ui32 (VM::CastF32UI32Op)
- vm.cast.ref.any (VM::CastRefAnyOp)
- vm.cast.si32.f32 (VM::CastSI32F32Op)
- vm.cast.ui32.f32 (VM::CastUI32F32Op)
- vm.ext.f32.f64 (VM::ExtF32F64Op)
- vm.ext.i16.i32.s (VM::ExtI16I32SOp)
- vm.ext.i16.i32.u (VM::ExtI16I32UOp)
- vm.ext.i16.i64.s (VM::ExtI16I64SOp)
- vm.ext.i16.i64.u (VM::ExtI16I64UOp)
- vm.ext.i32.i64.s (VM::ExtI32I64SOp)
- vm.ext.i32.i64.u (VM::ExtI32I64UOp)
- vm.ext.i8.i32.s (VM::ExtI8I32SOp)
- vm.ext.i8.i32.u (VM::ExtI8I32UOp)
- vm.ext.i8.i64.s (VM::ExtI8I64SOp)
- vm.ext.i8.i64.u (VM::ExtI8I64UOp)
- vm.trunc.f64.f32 (VM::TruncF64F32Op)
- vm.trunc.i16.i8 (VM::TruncI16I8Op)
- vm.trunc.i32.i16 (VM::TruncI32I16Op)
- vm.trunc.i32.i8 (VM::TruncI32I8Op)
- vm.trunc.i64.i16 (VM::TruncI64I16Op)
- vm.trunc.i64.i32 (VM::TruncI64I32Op)
- vm.trunc.i64.i8 (VM::TruncI64I8Op)
- Comparison ops
- vm.cmp.eq.i32 (VM::CmpEQI32Op)
- vm.cmp.eq.i64 (VM::CmpEQI64Op)
- vm.cmp.gte.i32.s (VM::CmpGTEI32SOp)
- vm.cmp.gte.i32.u (VM::CmpGTEI32UOp)
- vm.cmp.gte.i64.s (VM::CmpGTEI64SOp)
- vm.cmp.gte.i64.u (VM::CmpGTEI64UOp)
- vm.cmp.gt.i32.s (VM::CmpGTI32SOp)
- vm.cmp.gt.i32.u (VM::CmpGTI32UOp)
- vm.cmp.gt.i64.s (VM::CmpGTI64SOp)
- vm.cmp.gt.i64.u (VM::CmpGTI64UOp)
- vm.cmp.lte.i32.s (VM::CmpLTEI32SOp)
- vm.cmp.lte.i32.u (VM::CmpLTEI32UOp)
- vm.cmp.lte.i64.s (VM::CmpLTEI64SOp)
- vm.cmp.lte.i64.u (VM::CmpLTEI64UOp)
- vm.cmp.lt.i32.s (VM::CmpLTI32SOp)
- vm.cmp.lt.i32.u (VM::CmpLTI32UOp)
- vm.cmp.lt.i64.s (VM::CmpLTI64SOp)
- vm.cmp.lt.i64.u (VM::CmpLTI64UOp)
- vm.cmp.ne.i32 (VM::CmpNEI32Op)
- vm.cmp.ne.i64 (VM::CmpNEI64Op)
- vm.cmp.nz.i32 (VM::CmpNZI32Op)
- vm.cmp.nz.i64 (VM::CmpNZI64Op)
- Conditional assignment ops
- vm.select.f32 (VM::SelectF32Op)
- vm.select.f64 (VM::SelectF64Op)
- vm.select.i32 (VM::SelectI32Op)
- vm.select.i64 (VM::SelectI64Op)
- vm.select.ref (VM::SelectRefOp)
- vm.switch.f32 (VM::SwitchF32Op)
- vm.switch.f64 (VM::SwitchF64Op)
- vm.switch.i32 (VM::SwitchI32Op)
- vm.switch.i64 (VM::SwitchI64Op)
- vm.switch.ref (VM::SwitchRefOp)
- Constant ops
- vm.const.f32 (VM::ConstF32Op)
- vm.const.f32.zero (VM::ConstF32ZeroOp)
- vm.const.f64 (VM::ConstF64Op)
- vm.const.f64.zero (VM::ConstF64ZeroOp)
- vm.const.i32 (VM::ConstI32Op)
- vm.const.i32.zero (VM::ConstI32ZeroOp)
- vm.const.i64 (VM::ConstI64Op)
- vm.const.i64.zero (VM::ConstI64ZeroOp)
- vm.const.ref.rodata (VM::ConstRefRodataOp)
- vm.const.ref.zero (VM::ConstRefZeroOp)
- vm.rodata.inline (VM::RodataInlineOp)
- vm.rodata (VM::RodataOp)
- Control flow ops
- vm.br (VM::BranchOp)
- vm.br_table (VM::BranchTableOp)
- vm.call (VM::CallOp)
- vm.call.variadic (VM::CallVariadicOp)
- vm.check.eq (VM::CheckEQOp)
- vm.check.ne (VM::CheckNEOp)
- vm.check.nz (VM::CheckNZOp)
- vm.check.nearly_eq (VM::CheckNearlyEQOp)
- vm.cond_br (VM::CondBranchOp)
- vm.cond_fail (VM::CondFailOp)
- vm.fail (VM::FailOp)
- vm.import.resolved (VM::ImportResolvedOp)
- vm.return (VM::ReturnOp)
- Debugging ops
- vm.break (VM::BreakOp)
- vm.cond_break (VM::CondBreakOp)
- vm.print (VM::PrintOp)
- vm.trace (VM::TraceOp)
- Floating-point arithmetic ops
- vm.abs.f32 (VM::AbsF32Op)
- vm.abs.f64 (VM::AbsF64Op)
- vm.add.f32 (VM::AddF32Op)
- vm.add.f64 (VM::AddF64Op)
- vm.ceil.f32 (VM::CeilF32Op)
- vm.ceil.f64 (VM::CeilF64Op)
- vm.div.f32 (VM::DivF32Op)
- vm.div.f64 (VM::DivF64Op)
- vm.fma.f32 (VM::FMAF32Op)
- vm.fma.f64 (VM::FMAF64Op)
- vm.floor.f32 (VM::FloorF32Op)
- vm.floor.f64 (VM::FloorF64Op)
- vm.max.f32 (VM::MaxF32Op)
- vm.max.f64 (VM::MaxF64Op)
- vm.min.f32 (VM::MinF32Op)
- vm.min.f64 (VM::MinF64Op)
- vm.mul.f32 (VM::MulF32Op)
- vm.mul.f64 (VM::MulF64Op)
- vm.neg.f32 (VM::NegF32Op)
- vm.neg.f64 (VM::NegF64Op)
- vm.rem.f32 (VM::RemF32Op)
- vm.rem.f64 (VM::RemF64Op)
- vm.round.f32.even (VM::RoundF32EvenOp)
- vm.round.f32 (VM::RoundF32Op)
- vm.round.f64.even (VM::RoundF64EvenOp)
- vm.round.f64 (VM::RoundF64Op)
- vm.sub.f32 (VM::SubF32Op)
- vm.sub.f64 (VM::SubF64Op)
- Floating-point comparison ops
- vm.cmp.eq.f32.near (VM::CmpEQF32NearOp)
- vm.cmp.eq.f32.o (VM::CmpEQF32OOp)
- vm.cmp.eq.f32.u (VM::CmpEQF32UOp)
- vm.cmp.eq.f64.near (VM::CmpEQF64NearOp)
- vm.cmp.eq.f64.o (VM::CmpEQF64OOp)
- vm.cmp.eq.f64.u (VM::CmpEQF64UOp)
- vm.cmp.gte.f32.o (VM::CmpGTEF32OOp)
- vm.cmp.gte.f32.u (VM::CmpGTEF32UOp)
- vm.cmp.gte.f64.o (VM::CmpGTEF64OOp)
- vm.cmp.gte.f64.u (VM::CmpGTEF64UOp)
- vm.cmp.gt.f32.o (VM::CmpGTF32OOp)
- vm.cmp.gt.f32.u (VM::CmpGTF32UOp)
- vm.cmp.gt.f64.o (VM::CmpGTF64OOp)
- vm.cmp.gt.f64.u (VM::CmpGTF64UOp)
- vm.cmp.lte.f32.o (VM::CmpLTEF32OOp)
- vm.cmp.lte.f32.u (VM::CmpLTEF32UOp)
- vm.cmp.lte.f64.o (VM::CmpLTEF64OOp)
- vm.cmp.lte.f64.u (VM::CmpLTEF64UOp)
- vm.cmp.lt.f32.o (VM::CmpLTF32OOp)
- vm.cmp.lt.f32.u (VM::CmpLTF32UOp)
- vm.cmp.lt.f64.o (VM::CmpLTF64OOp)
- vm.cmp.lt.f64.u (VM::CmpLTF64UOp)
- vm.cmp.ne.f32.o (VM::CmpNEF32OOp)
- vm.cmp.ne.f32.u (VM::CmpNEF32UOp)
- vm.cmp.ne.f64.o (VM::CmpNEF64OOp)
- vm.cmp.ne.f64.u (VM::CmpNEF64UOp)
- vm.cmp.nz.f32.o (VM::CmpNZF32OOp)
- vm.cmp.nz.f32.u (VM::CmpNZF32UOp)
- vm.cmp.nz.f64.o (VM::CmpNZF64OOp)
- vm.cmp.nz.f64.u (VM::CmpNZF64UOp)
- vm.cmp.nan.f32 (VM::CmpNaNF32Op)
- vm.cmp.nan.f64 (VM::CmpNaNF64Op)
- Floating-point math ops
- vm.atan2.f32 (VM::Atan2F32Op)
- vm.atan2.f64 (VM::Atan2F64Op)
- vm.atan.f32 (VM::AtanF32Op)
- vm.atan.f64 (VM::AtanF64Op)
- vm.cos.f32 (VM::CosF32Op)
- vm.cos.f64 (VM::CosF64Op)
- vm.erf.f32 (VM::ErfF32Op)
- vm.erf.f64 (VM::ErfF64Op)
- vm.exp2.f32 (VM::Exp2F32Op)
- vm.exp2.f64 (VM::Exp2F64Op)
- vm.exp.f32 (VM::ExpF32Op)
- vm.exp.f64 (VM::ExpF64Op)
- vm.expm1.f32 (VM::ExpM1F32Op)
- vm.expm1.f64 (VM::ExpM1F64Op)
- vm.log10.f32 (VM::Log10F32Op)
- vm.log10.f64 (VM::Log10F64Op)
- vm.log1p.f32 (VM::Log1pF32Op)
- vm.log1p.f64 (VM::Log1pF64Op)
- vm.log2.f32 (VM::Log2F32Op)
- vm.log2.f64 (VM::Log2F64Op)
- vm.log.f32 (VM::LogF32Op)
- vm.log.f64 (VM::LogF64Op)
- vm.pow.f32 (VM::PowF32Op)
- vm.pow.f64 (VM::PowF64Op)
- vm.rsqrt.f32 (VM::RsqrtF32Op)
- vm.rsqrt.f64 (VM::RsqrtF64Op)
- vm.sin.f32 (VM::SinF32Op)
- vm.sin.f64 (VM::SinF64Op)
- vm.sqrt.f32 (VM::SqrtF32Op)
- vm.sqrt.f64 (VM::SqrtF64Op)
- vm.tanh.f32 (VM::TanhF32Op)
- vm.tanh.f64 (VM::TanhF64Op)
- Global ops
- vm.global.address (VM::GlobalAddressOp)
- vm.global.f32 (VM::GlobalF32Op)
- vm.global.f64 (VM::GlobalF64Op)
- vm.global.i32 (VM::GlobalI32Op)
- vm.global.i64 (VM::GlobalI64Op)
- vm.global.load.f32 (VM::GlobalLoadF32Op)
- vm.global.load.f64 (VM::GlobalLoadF64Op)
- vm.global.load.i32 (VM::GlobalLoadI32Op)
- vm.global.load.i64 (VM::GlobalLoadI64Op)
- vm.global.load.indirect.f32 (VM::GlobalLoadIndirectF32Op)
- vm.global.load.indirect.f64 (VM::GlobalLoadIndirectF64Op)
- vm.global.load.indirect.i32 (VM::GlobalLoadIndirectI32Op)
- vm.global.load.indirect.i64 (VM::GlobalLoadIndirectI64Op)
- vm.global.load.indirect.ref (VM::GlobalLoadIndirectRefOp)
- vm.global.load.ref (VM::GlobalLoadRefOp)
- vm.global.ref (VM::GlobalRefOp)
- vm.global.store.f32 (VM::GlobalStoreF32Op)
- vm.global.store.f64 (VM::GlobalStoreF64Op)
- vm.global.store.i32 (VM::GlobalStoreI32Op)
- vm.global.store.i64 (VM::GlobalStoreI64Op)
- vm.global.store.indirect.f32 (VM::GlobalStoreIndirectF32Op)
- vm.global.store.indirect.f64 (VM::GlobalStoreIndirectF64Op)
- vm.global.store.indirect.i32 (VM::GlobalStoreIndirectI32Op)
- vm.global.store.indirect.i64 (VM::GlobalStoreIndirectI64Op)
- vm.global.store.indirect.ref (VM::GlobalStoreIndirectRefOp)
- vm.global.store.ref (VM::GlobalStoreRefOp)
- Integer arithmetic ops
- vm.abs.i32 (VM::AbsI32Op)
- vm.abs.i64 (VM::AbsI64Op)
- vm.add.i32 (VM::AddI32Op)
- vm.add.i64 (VM::AddI64Op)
- vm.div.i32.s (VM::DivI32SOp)
- vm.div.i32.u (VM::DivI32UOp)
- vm.div.i64.s (VM::DivI64SOp)
- vm.div.i64.u (VM::DivI64UOp)
- vm.fma.i32 (VM::FMAI32Op)
- vm.fma.i64 (VM::FMAI64Op)
- vm.max.i32.s (VM::MaxI32SOp)
- vm.max.i32.u (VM::MaxI32UOp)
- vm.max.i64.s (VM::MaxI64SOp)
- vm.max.i64.u (VM::MaxI64UOp)
- vm.min.i32.s (VM::MinI32SOp)
- vm.min.i32.u (VM::MinI32UOp)
- vm.min.i64.s (VM::MinI64SOp)
- vm.min.i64.u (VM::MinI64UOp)
- vm.mul.i32 (VM::MulI32Op)
- vm.mul.i64 (VM::MulI64Op)
- vm.rem.i32.s (VM::RemI32SOp)
- vm.rem.i32.u (VM::RemI32UOp)
- vm.rem.i64.s (VM::RemI64SOp)
- vm.rem.i64.u (VM::RemI64UOp)
- vm.sub.i32 (VM::SubI32Op)
- vm.sub.i64 (VM::SubI64Op)
- Integer bit manipulation ops
- vm.and.i32 (VM::AndI32Op)
- vm.and.i64 (VM::AndI64Op)
- vm.ctlz.i32 (VM::CtlzI32Op)
- vm.ctlz.i64 (VM::CtlzI64Op)
- vm.not.i32 (VM::NotI32Op)
- vm.not.i64 (VM::NotI64Op)
- vm.or.i32 (VM::OrI32Op)
- vm.or.i64 (VM::OrI64Op)
- vm.xor.i32 (VM::XorI32Op)
- vm.xor.i64 (VM::XorI64Op)
- List ops
- vm.list.alloc (VM::ListAllocOp)
- vm.list.get.f32 (VM::ListGetF32Op)
- vm.list.get.f64 (VM::ListGetF64Op)
- vm.list.get.i32 (VM::ListGetI32Op)
- vm.list.get.i64 (VM::ListGetI64Op)
- vm.list.get.ref (VM::ListGetRefOp)
- vm.list.reserve (VM::ListReserveOp)
- vm.list.resize (VM::ListResizeOp)
- vm.list.set.f32 (VM::ListSetF32Op)
- vm.list.set.f64 (VM::ListSetF64Op)
- vm.list.set.i32 (VM::ListSetI32Op)
- vm.list.set.i64 (VM::ListSetI64Op)
- vm.list.set.ref (VM::ListSetRefOp)
- vm.list.size (VM::ListSizeOp)
- Ref comparison ops
- vm.cmp.eq.ref (VM::CmpEQRefOp)
- vm.cmp.ne.ref (VM::CmpNERefOp)
- vm.cmp.nz.ref (VM::CmpNZRefOp)
- Structural ops
- vm.export (VM::ExportOp)
- vm.func (VM::FuncOp)
- vm.import (VM::ImportOp)
- vm.initializer (VM::InitializerOp)
- vm.module (VM::ModuleOp)
- vm.module_terminator (VM::ModuleTerminatorOp)
- Attribute definition
- OrdinalCountsAttr
"},{"location":"reference/mlir-dialects/VM/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/VM/#asyncfiber-ops","title":"Async/fiber ops","text":""},{"location":"reference/mlir-dialects/VM/#vmyield-vmyieldop","title":"vm.yield
(VM::YieldOp)","text":"Unconditional fiber yield operation
Syntax:
operation ::= `vm.yield` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Yields the fiber for some (likely short) amount of time. This can be used to perform cooperative scheduling and ensure fair (enough) execution. Execution resumes at the specified target branch.
^bb0: vm.yield ^on_resume ^on_resume: ...
Traits: HasParent, Terminator, Util_YieldPoint
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#bitwise-shift-and-rotate-ops","title":"Bitwise shift and rotate ops","text":""},{"location":"reference/mlir-dialects/VM/#vmshli32-vmshli32op","title":"vm.shl.i32
(VM::ShlI32Op)","text":"Integer shift left operation
Syntax:
operation ::= `vm.shl.i32` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_1","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshli64-vmshli64op","title":"vm.shl.i64
(VM::ShlI64Op)","text":"Integer shift left operation
Syntax:
operation ::= `vm.shl.i64` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_2","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_1","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32s-vmshri32sop","title":"vm.shr.i32.s
(VM::ShrI32SOp)","text":"Signed integer (arithmetic) shift right operation
Syntax:
operation ::= `vm.shr.i32.s` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_3","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_2","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32u-vmshri32uop","title":"vm.shr.i32.u
(VM::ShrI32UOp)","text":"Unsigned integer (logical) shift right operation
Syntax:
operation ::= `vm.shr.i32.u` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_4","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_3","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64s-vmshri64sop","title":"vm.shr.i64.s
(VM::ShrI64SOp)","text":"Signed integer (arithmetic) shift right operation
Syntax:
operation ::= `vm.shr.i64.s` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_5","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_4","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64u-vmshri64uop","title":"vm.shr.i64.u
(VM::ShrI64UOp)","text":"Unsigned integer (logical) shift right operation
Syntax:
operation ::= `vm.shr.i64.u` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_6","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_5","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbufferalloc-vmbufferallocop","title":"vm.buffer.alloc
(VM::BufferAllocOp)","text":"Allocates a new zero-initialized buffer
Syntax:
operation ::= `vm.buffer.alloc` operands attr-dict `:` type($result)\n
Allocates a new zero-initialized buffer with the given size in bytes.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_7","title":"Operands:","text":"Operand Description length
64-bit signless integer alignment
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_6","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmbufferclone-vmbuffercloneop","title":"vm.buffer.clone
(VM::BufferCloneOp)","text":"Clones a buffer
Syntax:
operation ::= `vm.buffer.clone` operands attr-dict `:` type($source_buffer) `->` type($result)\n
Clones a range of the source buffer to produce a mutable buffer with the same contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_8","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer length
64-bit signless integer alignment
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_7","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmbuffercompare-vmbuffercompareop","title":"vm.buffer.compare
(VM::BufferCompareOp)","text":"Compares a range of a buffer to another
Syntax:
operation ::= `vm.buffer.compare` operands attr-dict `:` type($lhs_buffer) `,` type($rhs_buffer)\n
Returns 1 if the two ranges are bitwise equivalent, somewhat like memcmp.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_9","title":"Operands:","text":"Operand Description lhs_buffer
ref lhs_offset
64-bit signless integer rhs_buffer
ref rhs_offset
64-bit signless integer length
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_8","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbuffercopy-vmbuffercopyop","title":"vm.buffer.copy
(VM::BufferCopyOp)","text":"Copies a range of a buffer to another
Syntax:
operation ::= `vm.buffer.copy` operands attr-dict `:` type($source_buffer) `->` type($target_buffer)\n
Copies a range of one buffer to another, like memcpy.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_10","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf32-vmbufferfillf32op","title":"vm.buffer.fill.f32
(VM::BufferFillF32Op)","text":"Fills the buffer with the given repeating 32-bit value
Syntax:
operation ::= `vm.buffer.fill.f32` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_11","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf64-vmbufferfillf64op","title":"vm.buffer.fill.f64
(VM::BufferFillF64Op)","text":"Fills the buffer with the given repeating 64-bit value
Syntax:
operation ::= `vm.buffer.fill.f64` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_12","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli16-vmbufferfilli16op","title":"vm.buffer.fill.i16
(VM::BufferFillI16Op)","text":"Fills the buffer with the given repeating 16-bit value
Syntax:
operation ::= `vm.buffer.fill.i16` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_13","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli32-vmbufferfilli32op","title":"vm.buffer.fill.i32
(VM::BufferFillI32Op)","text":"Fills the buffer with the given repeating 32-bit value
Syntax:
operation ::= `vm.buffer.fill.i32` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_14","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli64-vmbufferfilli64op","title":"vm.buffer.fill.i64
(VM::BufferFillI64Op)","text":"Fills the buffer with the given repeating 64-bit value
Syntax:
operation ::= `vm.buffer.fill.i64` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_15","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli8-vmbufferfilli8op","title":"vm.buffer.fill.i8
(VM::BufferFillI8Op)","text":"Fills the buffer with the given repeating 8-bit value
Syntax:
operation ::= `vm.buffer.fill.i8` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_16","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
8-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferlength-vmbufferlengthop","title":"vm.buffer.length
(VM::BufferLengthOp)","text":"Returns the byte length of a buffer
Syntax:
operation ::= `vm.buffer.length` operands attr-dict `:` type($buffer) `->` type($result)\n
Returns the total byte length of the given buffer. This is the exact value as specified during buffer allocation though the underlying system buffer may have additional padding.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_17","title":"Operands:","text":"Operand Description buffer
ref"},{"location":"reference/mlir-dialects/VM/#results_9","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf32-vmbufferloadf32op","title":"vm.buffer.load.f32
(VM::BufferLoadF32Op)","text":"32-bit floating-point load
Syntax:
operation ::= `vm.buffer.load.f32` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_18","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_10","title":"Results:","text":"Result Description result
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf64-vmbufferloadf64op","title":"vm.buffer.load.f64
(VM::BufferLoadF64Op)","text":"64-bit floating-point load
Syntax:
operation ::= `vm.buffer.load.f64` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_19","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_11","title":"Results:","text":"Result Description result
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16s-vmbufferloadi16sop","title":"vm.buffer.load.i16.s
(VM::BufferLoadI16SOp)","text":"Signed 16-bit integer load
Syntax:
operation ::= `vm.buffer.load.i16.s` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_20","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_12","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16u-vmbufferloadi16uop","title":"vm.buffer.load.i16.u
(VM::BufferLoadI16UOp)","text":"Unsigned 16-bit integer load
Syntax:
operation ::= `vm.buffer.load.i16.u` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_21","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_13","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi32-vmbufferloadi32op","title":"vm.buffer.load.i32
(VM::BufferLoadI32Op)","text":"32-bit integer load
Syntax:
operation ::= `vm.buffer.load.i32` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_22","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_14","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi64-vmbufferloadi64op","title":"vm.buffer.load.i64
(VM::BufferLoadI64Op)","text":"64-bit integer load
Syntax:
operation ::= `vm.buffer.load.i64` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_23","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_15","title":"Results:","text":"Result Description result
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8s-vmbufferloadi8sop","title":"vm.buffer.load.i8.s
(VM::BufferLoadI8SOp)","text":"Signed 8-bit integer load
Syntax:
operation ::= `vm.buffer.load.i8.s` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_24","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_16","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8u-vmbufferloadi8uop","title":"vm.buffer.load.i8.u
(VM::BufferLoadI8UOp)","text":"Unsigned 8-bit integer load
Syntax:
operation ::= `vm.buffer.load.i8.u` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_25","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_17","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref32-vmbufferstoref32op","title":"vm.buffer.store.f32
(VM::BufferStoreF32Op)","text":"32-bit floating-point store
Syntax:
operation ::= `vm.buffer.store.f32` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_26","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref64-vmbufferstoref64op","title":"vm.buffer.store.f64
(VM::BufferStoreF64Op)","text":"64-bit floating-point store
Syntax:
operation ::= `vm.buffer.store.f64` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_27","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei16-vmbufferstorei16op","title":"vm.buffer.store.i16
(VM::BufferStoreI16Op)","text":"Unsigned 16-bit integer store
Syntax:
operation ::= `vm.buffer.store.i16` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_28","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei32-vmbufferstorei32op","title":"vm.buffer.store.i32
(VM::BufferStoreI32Op)","text":"32-bit integer store
Syntax:
operation ::= `vm.buffer.store.i32` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_29","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei64-vmbufferstorei64op","title":"vm.buffer.store.i64
(VM::BufferStoreI64Op)","text":"64-bit integer store
Syntax:
operation ::= `vm.buffer.store.i64` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_30","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei8-vmbufferstorei8op","title":"vm.buffer.store.i8
(VM::BufferStoreI8Op)","text":"Unsigned 8-bit integer store
Syntax:
operation ::= `vm.buffer.store.i8` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_31","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#casting-and-conversion-ops","title":"Casting and conversion ops","text":"Casting and type conversion/emulation.
"},{"location":"reference/mlir-dialects/VM/#vmbitcastf32i32-vmbitcastf32i32op","title":"vm.bitcast.f32.i32
(VM::BitcastF32I32Op)","text":"Bitcast from a 32-bit float-point value to a 32-bit integer
Syntax:
operation ::= `vm.bitcast.f32.i32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_32","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_18","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcastf64i64-vmbitcastf64i64op","title":"vm.bitcast.f64.i64
(VM::BitcastF64I64Op)","text":"Bitcast from a 64-bit float-point value to a 64-bit integer
Syntax:
operation ::= `vm.bitcast.f64.i64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_33","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_19","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcasti32f32-vmbitcasti32f32op","title":"vm.bitcast.i32.f32
(VM::BitcastI32F32Op)","text":"Bitcast from a 32-bit integer to a 32-bit float-point value
Syntax:
operation ::= `vm.bitcast.i32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_34","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_20","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmbitcasti64f64-vmbitcasti64f64op","title":"vm.bitcast.i64.f64
(VM::BitcastI64F64Op)","text":"Bitcast from a 64-bit integer to a 64-bit float-point value
Syntax:
operation ::= `vm.bitcast.i64.f64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_35","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_21","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastanyref-vmcastanyrefop","title":"vm.cast.any.ref
(VM::CastAnyRefOp)","text":"Casts from any ref to a specific ref type
Syntax:
operation ::= `vm.cast.any.ref` $operand attr-dict `:` type($operand) `->` type($result)\n
Performs a runtime cast of an opaque !vm.ref<?>
to a specific !vm.ref<T>
and raises an error if the operand does not match the expected type. Null refs can always be cast between types.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_36","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_22","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmcastf32si32-vmcastf32si32op","title":"vm.cast.f32.si32
(VM::CastF32SI32Op)","text":"Cast from a float-point value to a signed integer
Syntax:
operation ::= `vm.cast.f32.si32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_37","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_23","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastf32ui32-vmcastf32ui32op","title":"vm.cast.f32.ui32
(VM::CastF32UI32Op)","text":"Cast from an float-point value to an unsigned integer
Syntax:
operation ::= `vm.cast.f32.ui32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_38","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_24","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastrefany-vmcastrefanyop","title":"vm.cast.ref.any
(VM::CastRefAnyOp)","text":"Casts from a specific ref to any ref type
Syntax:
operation ::= `vm.cast.ref.any` $operand attr-dict `:` type($operand) `->` type($result)\n
Performs a compile-time widening cast of a specific !vm.ref<T>
to an opaque !vm.ref<?>
.
Traits: AlwaysSpeculatableImplTrait, VM_AssignmentOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_39","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_25","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmcastsi32f32-vmcastsi32f32op","title":"vm.cast.si32.f32
(VM::CastSI32F32Op)","text":"Cast from a signed integer to a float-point value
Syntax:
operation ::= `vm.cast.si32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_40","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_26","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastui32f32-vmcastui32f32op","title":"vm.cast.ui32.f32
(VM::CastUI32F32Op)","text":"Cast from an unsigned integer to a float-point value
Syntax:
operation ::= `vm.cast.ui32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_41","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_27","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmextf32f64-vmextf32f64op","title":"vm.ext.f32.f64
(VM::ExtF32F64Op)","text":"Floating-point zero extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.f32.f64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_42","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_28","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexti16i32s-vmexti16i32sop","title":"vm.ext.i16.i32.s
(VM::ExtI16I32SOp)","text":"Integer sign extend 16 bits to 32 bits
Syntax:
operation ::= `vm.ext.i16.i32.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_43","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_29","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i32u-vmexti16i32uop","title":"vm.ext.i16.i32.u
(VM::ExtI16I32UOp)","text":"Integer zero extend 16 bits to 32 bits
Syntax:
operation ::= `vm.ext.i16.i32.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_44","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_30","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64s-vmexti16i64sop","title":"vm.ext.i16.i64.s
(VM::ExtI16I64SOp)","text":"Integer sign extend 16 bits to 64 bits
Syntax:
operation ::= `vm.ext.i16.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_45","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_31","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64u-vmexti16i64uop","title":"vm.ext.i16.i64.u
(VM::ExtI16I64UOp)","text":"Integer zero extend 16 bits to 64 bits
Syntax:
operation ::= `vm.ext.i16.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_46","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_32","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64s-vmexti32i64sop","title":"vm.ext.i32.i64.s
(VM::ExtI32I64SOp)","text":"Integer sign extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.i32.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_47","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_33","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64u-vmexti32i64uop","title":"vm.ext.i32.i64.u
(VM::ExtI32I64UOp)","text":"Integer zero extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.i32.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_48","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_34","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32s-vmexti8i32sop","title":"vm.ext.i8.i32.s
(VM::ExtI8I32SOp)","text":"Integer sign extend 8 bits to 32 bits
Syntax:
operation ::= `vm.ext.i8.i32.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_49","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_35","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32u-vmexti8i32uop","title":"vm.ext.i8.i32.u
(VM::ExtI8I32UOp)","text":"Integer zero extend 8 bits to 32 bits
Syntax:
operation ::= `vm.ext.i8.i32.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_50","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_36","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64s-vmexti8i64sop","title":"vm.ext.i8.i64.s
(VM::ExtI8I64SOp)","text":"Integer sign extend 8 bits to 64 bits
Syntax:
operation ::= `vm.ext.i8.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_51","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_37","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64u-vmexti8i64uop","title":"vm.ext.i8.i64.u
(VM::ExtI8I64UOp)","text":"Integer zero extend 8 bits to 64 bits
Syntax:
operation ::= `vm.ext.i8.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_52","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_38","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtruncf64f32-vmtruncf64f32op","title":"vm.trunc.f64.f32
(VM::TruncF64F32Op)","text":"Floating-point truncate to 32 bits
Syntax:
operation ::= `vm.trunc.f64.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_53","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_39","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtrunci16i8-vmtrunci16i8op","title":"vm.trunc.i16.i8
(VM::TruncI16I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i16.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_54","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_40","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i16-vmtrunci32i16op","title":"vm.trunc.i32.i16
(VM::TruncI32I16Op)","text":"Integer truncate to 16 bits
Syntax:
operation ::= `vm.trunc.i32.i16` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_55","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_41","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i8-vmtrunci32i8op","title":"vm.trunc.i32.i8
(VM::TruncI32I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i32.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_56","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_42","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i16-vmtrunci64i16op","title":"vm.trunc.i64.i16
(VM::TruncI64I16Op)","text":"Integer truncate to 16 bits
Syntax:
operation ::= `vm.trunc.i64.i16` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_57","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_43","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i32-vmtrunci64i32op","title":"vm.trunc.i64.i32
(VM::TruncI64I32Op)","text":"Integer truncate to 32 bits
Syntax:
operation ::= `vm.trunc.i64.i32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_58","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_44","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i8-vmtrunci64i8op","title":"vm.trunc.i64.i8
(VM::TruncI64I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i64.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_59","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_45","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#comparison-ops","title":"Comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqi32-vmcmpeqi32op","title":"vm.cmp.eq.i32
(VM::CmpEQI32Op)","text":"Integer equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.i32` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_60","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_46","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqi64-vmcmpeqi64op","title":"vm.cmp.eq.i64
(VM::CmpEQI64Op)","text":"Integer equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.i64` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_61","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_47","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32s-vmcmpgtei32sop","title":"vm.cmp.gte.i32.s
(VM::CmpGTEI32SOp)","text":"Signed integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_62","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_48","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32u-vmcmpgtei32uop","title":"vm.cmp.gte.i32.u
(VM::CmpGTEI32UOp)","text":"Unsigned integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_63","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_49","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64s-vmcmpgtei64sop","title":"vm.cmp.gte.i64.s
(VM::CmpGTEI64SOp)","text":"Signed integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_64","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_50","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64u-vmcmpgtei64uop","title":"vm.cmp.gte.i64.u
(VM::CmpGTEI64UOp)","text":"Unsigned integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_65","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_51","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32s-vmcmpgti32sop","title":"vm.cmp.gt.i32.s
(VM::CmpGTI32SOp)","text":"Signed integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_66","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_52","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32u-vmcmpgti32uop","title":"vm.cmp.gt.i32.u
(VM::CmpGTI32UOp)","text":"Unsigned integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_67","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_53","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64s-vmcmpgti64sop","title":"vm.cmp.gt.i64.s
(VM::CmpGTI64SOp)","text":"Signed integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_68","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_54","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64u-vmcmpgti64uop","title":"vm.cmp.gt.i64.u
(VM::CmpGTI64UOp)","text":"Unsigned integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_69","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_55","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32s-vmcmpltei32sop","title":"vm.cmp.lte.i32.s
(VM::CmpLTEI32SOp)","text":"Signed integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_70","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_56","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32u-vmcmpltei32uop","title":"vm.cmp.lte.i32.u
(VM::CmpLTEI32UOp)","text":"Unsigned integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_71","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_57","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64s-vmcmpltei64sop","title":"vm.cmp.lte.i64.s
(VM::CmpLTEI64SOp)","text":"Signed integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_72","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_58","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64u-vmcmpltei64uop","title":"vm.cmp.lte.i64.u
(VM::CmpLTEI64UOp)","text":"Unsigned integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_73","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_59","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32s-vmcmplti32sop","title":"vm.cmp.lt.i32.s
(VM::CmpLTI32SOp)","text":"Signed integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_74","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_60","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32u-vmcmplti32uop","title":"vm.cmp.lt.i32.u
(VM::CmpLTI32UOp)","text":"Unsigned integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_75","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_61","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64s-vmcmplti64sop","title":"vm.cmp.lt.i64.s
(VM::CmpLTI64SOp)","text":"Signed integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_76","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_62","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64u-vmcmplti64uop","title":"vm.cmp.lt.i64.u
(VM::CmpLTI64UOp)","text":"Unsigned integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_77","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_63","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei32-vmcmpnei32op","title":"vm.cmp.ne.i32
(VM::CmpNEI32Op)","text":"Integer inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.i32` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_78","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_64","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei64-vmcmpnei64op","title":"vm.cmp.ne.i64
(VM::CmpNEI64Op)","text":"Integer inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.i64` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_79","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_65","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi32-vmcmpnzi32op","title":"vm.cmp.nz.i32
(VM::CmpNZI32Op)","text":"Integer non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.i32` $operand attr-dict `:` type($operand)\n
Compares the given integer operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_80","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_66","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi64-vmcmpnzi64op","title":"vm.cmp.nz.i64
(VM::CmpNZI64Op)","text":"Integer non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.i64` $operand attr-dict `:` type($operand)\n
Compares the given integer operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_81","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_67","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#conditional-assignment-ops","title":"Conditional assignment ops","text":""},{"location":"reference/mlir-dialects/VM/#vmselectf32-vmselectf32op","title":"vm.select.f32
(VM::SelectF32Op)","text":"Floating-point select operation
Syntax:
operation ::= `vm.select.f32` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_82","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
32-bit float false_value
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_68","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmselectf64-vmselectf64op","title":"vm.select.f64
(VM::SelectF64Op)","text":"Floating-point select operation
Syntax:
operation ::= `vm.select.f64` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_83","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
64-bit float false_value
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_69","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmselecti32-vmselecti32op","title":"vm.select.i32
(VM::SelectI32Op)","text":"Integer select operation
Syntax:
operation ::= `vm.select.i32` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_84","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
32-bit signless integer false_value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_70","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselecti64-vmselecti64op","title":"vm.select.i64
(VM::SelectI64Op)","text":"Integer select operation
Syntax:
operation ::= `vm.select.i64` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_85","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
64-bit signless integer false_value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_71","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselectref-vmselectrefop","title":"vm.select.ref
(VM::SelectRefOp)","text":"Ref select operation
Syntax:
operation ::= `vm.select.ref` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_86","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
ref false_value
ref"},{"location":"reference/mlir-dialects/VM/#results_72","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmswitchf32-vmswitchf32op","title":"vm.switch.f32
(VM::SwitchF32Op)","text":"Floating-point switch operation
Syntax:
operation ::= `vm.switch.f32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_87","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
32-bit float values
variadic of 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_73","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchf64-vmswitchf64op","title":"vm.switch.f64
(VM::SwitchF64Op)","text":"Floating-point switch operation
Syntax:
operation ::= `vm.switch.f64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_88","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
64-bit float values
variadic of 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_74","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchi32-vmswitchi32op","title":"vm.switch.i32
(VM::SwitchI32Op)","text":"Integer switch operation
Syntax:
operation ::= `vm.switch.i32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_89","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
32-bit signless integer values
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_75","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchi64-vmswitchi64op","title":"vm.switch.i64
(VM::SwitchI64Op)","text":"Integer switch operation
Syntax:
operation ::= `vm.switch.i64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_90","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
64-bit signless integer values
variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_76","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchref-vmswitchrefop","title":"vm.switch.ref
(VM::SwitchRefOp)","text":"Ref switch operation
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %arg0 to cases of %r0/%r1/%r2 if arg0==0, ==1, ==2.\n// If %arg0 is out of range (<0 or >2) then default to %null.\n%0 = vm.switch.ref %index[%r0, %r1, %r2] else %null : vm.ref<!foo>\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_91","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
ref values
variadic of ref"},{"location":"reference/mlir-dialects/VM/#results_77","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#constant-ops","title":"Constant ops","text":""},{"location":"reference/mlir-dialects/VM/#vmconstf32-vmconstf32op","title":"vm.const.f32
(VM::ConstF32Op)","text":"32-bit floating-point constant operation
Syntax:
operation ::= `vm.const.f32` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription value
FloatAttr32-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_78","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf32zero-vmconstf32zeroop","title":"vm.const.f32.zero
(VM::ConstF32ZeroOp)","text":"32-bit floating-point constant zero operation
Syntax:
operation ::= `vm.const.f32.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_79","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64-vmconstf64op","title":"vm.const.f64
(VM::ConstF64Op)","text":"64-bit floating-point constant operation
Syntax:
operation ::= `vm.const.f64` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription value
FloatAttr64-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_80","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64zero-vmconstf64zeroop","title":"vm.const.f64.zero
(VM::ConstF64ZeroOp)","text":"64-bit floating-point constant zero operation
Syntax:
operation ::= `vm.const.f64.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_81","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconsti32-vmconsti32op","title":"vm.const.i32
(VM::ConstI32Op)","text":"32-bit integer constant operation
Syntax:
operation ::= `vm.const.i32` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription value
IntegerAttr32-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_82","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti32zero-vmconsti32zeroop","title":"vm.const.i32.zero
(VM::ConstI32ZeroOp)","text":"32-bit integer constant zero operation
Syntax:
operation ::= `vm.const.i32.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_83","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64-vmconsti64op","title":"vm.const.i64
(VM::ConstI64Op)","text":"64-bit integer constant operation
Syntax:
operation ::= `vm.const.i64` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription value
IntegerAttr64-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_84","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64zero-vmconsti64zeroop","title":"vm.const.i64.zero
(VM::ConstI64ZeroOp)","text":"64-bit integer constant zero operation
Syntax:
operation ::= `vm.const.i64.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_85","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconstrefrodata-vmconstrefrodataop","title":"vm.const.ref.rodata
(VM::ConstRefRodataOp)","text":"Constant rodata access operation
Syntax:
operation ::= `vm.const.ref.rodata` $rodata attr-dict `:` type($value)\n
Returns a reference to a read-only buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription rodata
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_86","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmconstrefzero-vmconstrefzeroop","title":"vm.const.ref.zero
(VM::ConstRefZeroOp)","text":"Null ref constant operation
Syntax:
operation ::= `vm.const.ref.zero` `:` type($result) attr-dict\n
Defines a constant null ref that can be used in comparisons and initialization.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_87","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmrodatainline-vmrodatainlineop","title":"vm.rodata.inline
(VM::RodataInlineOp)","text":"Inlined constant rodata
Syntax:
operation ::= `vm.rodata.inline` ($name^)? attr-dict `:` type($result) `=` $value\n
vm.rodata that can be embedded inline in functions. See vm.rodata for more information.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttr64-bit signless integer attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_88","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmrodata-vmrodataop","title":"vm.rodata
(VM::RodataOp)","text":"Read-only data definition operation
Syntax:
operation ::= `vm.rodata` custom<SymbolVisibility>($sym_visibility) $sym_name attr-dict $value\n
Defines a blob of read-only constant data that can be represented as a ref. This can be used to store arbitrary data within modules such as large constant buffers and other file contents.
Note that the data is reference counted as a way to track its usage once the value leaves the module. For example, returning rodata from an exported function must keep the data (possibly backed by mmap) valid for its entire lifetime.
By default all rodata will be aligned in the final module output at a 16-byte granularity. An optional alignment can be specified to override the default for cases where larger or smaller alignments are needed.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttr64-bit signless integer attribute ordinal
::mlir::IntegerAttrordinal value mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#control-flow-ops","title":"Control flow ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbr-vmbranchop","title":"vm.br
(VM::BranchOp)","text":"Unconditional branch operation
Syntax:
operation ::= `vm.br` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Represents an unconditional branch operation that branches to a target block with the given set of arguments.
^bb0(...): vm.br ^bb1(%a) ^bb1(%blockArg1): ...
Traits: Terminator
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_92","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_1","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmbr_table-vmbranchtableop","title":"vm.br_table
(VM::BranchTableOp)","text":"Branch table operation
Syntax:
operation ::= `vm.br_table` $index ` ` `{` `\\n`\n custom<BranchTableCases>(\n $defaultDestination, $defaultOperands, type($defaultOperands),\n $caseDestinations, $caseOperands, type($caseOperands))\n `}`\n attr-dict\n
Represents a branch table instructing execution to branch to the block with the specified index. If the index is out of bounds then execution will branch to the default block.
vm.br_table %index { default: ^bb1(%a : i64), 0: ^bb2, 1: ^bb3(%c : i64) }
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Terminator
Interfaces: BranchOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription case_operand_segments
::mlir::DenseI32ArrayAttri32 dense array attribute"},{"location":"reference/mlir-dialects/VM/#operands_93","title":"Operands:","text":"Operand Description index
32-bit signless integer defaultOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref caseOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_2","title":"Successors:","text":"Successor Description defaultDestination
any successor caseDestinations
any successor"},{"location":"reference/mlir-dialects/VM/#vmcall-vmcallop","title":"vm.call
(VM::CallOp)","text":"Call operation
Syntax:
operation ::= `vm.call` $callee `(` operands `)` attr-dict `:` functional-type(operands, results)\n
Calls an internal VM function with the given arguments.
Interfaces: CallOpInterface, MemoryEffectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_94","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_89","title":"Results:","text":"Result Description results
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcallvariadic-vmcallvariadicop","title":"vm.call.variadic
(VM::CallVariadicOp)","text":"Call operation with variadic arguments
Calls an internal VM function with the given arguments. One or more of the arguments may be variadic, encoded as segmented sized operand lists.
Variadic arguments must be specified with a total count in the segment_sizes attribute.
Interfaces: CallOpInterface, MemoryEffectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
FlatSymbolRefAttrsymbol reference attribute segment_sizes
::mlir::DenseIntElementsAttr16-bit signless integer elements attribute segment_types
::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/VM/#operands_95","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_90","title":"Results:","text":"Result Description results
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckeq-vmcheckeqop","title":"vm.check.eq
(VM::CheckEQOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_96","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckne-vmcheckneop","title":"vm.check.ne
(VM::CheckNEOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.ne` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_97","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknz-vmchecknzop","title":"vm.check.nz
(VM::CheckNZOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.nz` $value (`,` $message^)? attr-dict `:` type($value)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_98","title":"Operands:","text":"Operand Description value
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknearly_eq-vmchecknearlyeqop","title":"vm.check.nearly_eq
(VM::CheckNearlyEQOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.nearly_eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_99","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcond_br-vmcondbranchop","title":"vm.cond_br
(VM::CondBranchOp)","text":"Conditional branch operation
Syntax:
operation ::= `vm.cond_br` $condition `,`\n $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`\n $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?\n attr-dict\n
Represents a conditional branch operation that branches to one of the two target blocks with the given set of arguments.
^bb0(...): vm.cond_br %condition, ^bb1(%a), ^bb2(%b) ^bb1(%blockArg1): ... ^bb2(%blockArg2): ...
Traits: AttrSizedOperandSegments, Terminator
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_100","title":"Operands:","text":"Operand Description condition
32-bit signless integer trueDestOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref falseDestOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_3","title":"Successors:","text":"Successor Description trueDest
any successor falseDest
any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_fail-vmcondfailop","title":"vm.cond_fail
(VM::CondFailOp)","text":"Raises a global failure if the condition is true
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
This is implemented as a pseudo-op that transforms into a vm.fail operation guarded by the condition.
%nz = vm.cmp.nz.i32 %value : i32\n%statusCode = vm.const.i32 9\nvm.cond_fail %nz, %statusCode, \"expected non-zero\"\n
Traits: VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_101","title":"Operands:","text":"Operand Description condition
32-bit signless integer status
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfail-vmfailop","title":"vm.fail
(VM::FailOp)","text":"Raises a global failure
Syntax:
operation ::= `vm.fail` $status (`,` $message^)? attr-dict\n
Signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
%statusCode = vm.const.i32 9\nvm.fail %statusCode, \"oh no!\"\n
Traits: Terminator
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_102","title":"Operands:","text":"Operand Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmimportresolved-vmimportresolvedop","title":"vm.import.resolved
(VM::ImportResolvedOp)","text":"Returns true if an optional import was resolved at runtime
Syntax:
operation ::= `vm.import.resolved` $import attr-dict `:` type($result)\n
Allows for checking whether a optional import was resolved at runtime. If this returns false then attempting to call the imported function will result in a failure at runtime.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription import
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_91","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmreturn-vmreturnop","title":"vm.return
(VM::ReturnOp)","text":"Return operation
Syntax:
operation ::= `vm.return` attr-dict ($operands^ `:` type($operands))?\n
Represents a return operation within a function.
vm.func @foo(%0: i32, %1: f8) -> (i32, f8) {\n vm.return %0, %1 : i32, f8\n}\n
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_103","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#debugging-ops","title":"Debugging ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbreak-vmbreakop","title":"vm.break
(VM::BreakOp)","text":"Unconditional debug break operation
Syntax:
operation ::= `vm.break` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Breaks into the attached debugger or asks for attaching a debugger. After resuming (or if a debugger is not attached) execution will continue at the target block.
Traits: Terminator, Util_YieldPoint, VM_DebugOnly, VM_FullBarrier
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_104","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_4","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_break-vmcondbreakop","title":"vm.cond_break
(VM::CondBreakOp)","text":"Conditional debug break operation
Syntax:
operation ::= `vm.cond_break` $condition `,` $dest (`(` $destOperands^ `:` type($destOperands) `)`)?\n attr-dict\n
Breaks into the attached debugger or asks for attaching a debugger if the provided condition is true. After resuming (or if a debugger is not attached) execution will continue at the target block.
Traits: Terminator, Util_YieldPoint, VM_DebugOnly, VM_FullBarrier
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_105","title":"Operands:","text":"Operand Description condition
32-bit signless integer destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_5","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmprint-vmprintop","title":"vm.print
(VM::PrintOp)","text":"Message printing operation
Syntax:
operation ::= `vm.print` $message `(` operands `)` attr-dict `:` type(operands)\n
Prints the given string message and zero or more values.
Traits: VM_DebugOnly, VM_FullBarrier
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_106","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmtrace-vmtraceop","title":"vm.trace
(VM::TraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `vm.trace` $event_name `(` operands `)` attr-dict `:` type(operands)\n
Traces one or more values at the time the operation is executed. These values will be encoded into the active trace depending on the active trace verbosity setting.
Traits: VM_DebugOnly, VM_FullBarrier
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription event_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_107","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#floating-point-arithmetic-ops","title":"Floating-point arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsf32-vmabsf32op","title":"vm.abs.f32
(VM::AbsF32Op)","text":"Floating point absolute-value operation
Syntax:
operation ::= `vm.abs.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_108","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_92","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmabsf64-vmabsf64op","title":"vm.abs.f64
(VM::AbsF64Op)","text":"Floating point absolute-value operation
Syntax:
operation ::= `vm.abs.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_109","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_93","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf32-vmaddf32op","title":"vm.add.f32
(VM::AddF32Op)","text":"Floating-point add operation
Syntax:
operation ::= `vm.add.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_110","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_94","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf64-vmaddf64op","title":"vm.add.f64
(VM::AddF64Op)","text":"Floating-point add operation
Syntax:
operation ::= `vm.add.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_111","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_95","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf32-vmceilf32op","title":"vm.ceil.f32
(VM::CeilF32Op)","text":"Floating point ceiling operation
Syntax:
operation ::= `vm.ceil.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_112","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_96","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf64-vmceilf64op","title":"vm.ceil.f64
(VM::CeilF64Op)","text":"Floating point ceiling operation
Syntax:
operation ::= `vm.ceil.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_113","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_97","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf32-vmdivf32op","title":"vm.div.f32
(VM::DivF32Op)","text":"Floating point division operation
Syntax:
operation ::= `vm.div.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_114","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_98","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf64-vmdivf64op","title":"vm.div.f64
(VM::DivF64Op)","text":"Floating point division operation
Syntax:
operation ::= `vm.div.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_115","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_99","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf32-vmfmaf32op","title":"vm.fma.f32
(VM::FMAF32Op)","text":"Floating point fused multiply-add operation (a*b+c)
Syntax:
operation ::= `vm.fma.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_116","title":"Operands:","text":"Operand Description a
32-bit float b
32-bit float c
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_100","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf64-vmfmaf64op","title":"vm.fma.f64
(VM::FMAF64Op)","text":"Floating point fused multiply-add operation (a*b+c)
Syntax:
operation ::= `vm.fma.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_117","title":"Operands:","text":"Operand Description a
64-bit float b
64-bit float c
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_101","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf32-vmfloorf32op","title":"vm.floor.f32
(VM::FloorF32Op)","text":"Floating point floor operation
Syntax:
operation ::= `vm.floor.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_118","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_102","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf64-vmfloorf64op","title":"vm.floor.f64
(VM::FloorF64Op)","text":"Floating point floor operation
Syntax:
operation ::= `vm.floor.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_119","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_103","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf32-vmmaxf32op","title":"vm.max.f32
(VM::MaxF32Op)","text":"Floating point maximum operation
Syntax:
operation ::= `vm.max.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_120","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_104","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf64-vmmaxf64op","title":"vm.max.f64
(VM::MaxF64Op)","text":"Floating point maximum operation
Syntax:
operation ::= `vm.max.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_121","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_105","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf32-vmminf32op","title":"vm.min.f32
(VM::MinF32Op)","text":"Floating point minimum operation
Syntax:
operation ::= `vm.min.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_122","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_106","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf64-vmminf64op","title":"vm.min.f64
(VM::MinF64Op)","text":"Floating point minimum operation
Syntax:
operation ::= `vm.min.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_123","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_107","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf32-vmmulf32op","title":"vm.mul.f32
(VM::MulF32Op)","text":"Floating point multiplication operation
Syntax:
operation ::= `vm.mul.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_124","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_108","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf64-vmmulf64op","title":"vm.mul.f64
(VM::MulF64Op)","text":"Floating point multiplication operation
Syntax:
operation ::= `vm.mul.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_125","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_109","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf32-vmnegf32op","title":"vm.neg.f32
(VM::NegF32Op)","text":"Floating point negation operation
Syntax:
operation ::= `vm.neg.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_126","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_110","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf64-vmnegf64op","title":"vm.neg.f64
(VM::NegF64Op)","text":"Floating point negation operation
Syntax:
operation ::= `vm.neg.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_127","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_111","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf32-vmremf32op","title":"vm.rem.f32
(VM::RemF32Op)","text":"Floating point remainder operation
Syntax:
operation ::= `vm.rem.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_128","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_112","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf64-vmremf64op","title":"vm.rem.f64
(VM::RemF64Op)","text":"Floating point remainder operation
Syntax:
operation ::= `vm.rem.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_129","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_113","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32even-vmroundf32evenop","title":"vm.round.f32.even
(VM::RoundF32EvenOp)","text":"Rounds the value to the nearest even integer
Syntax:
operation ::= `vm.round.f32.even` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_130","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_114","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32-vmroundf32op","title":"vm.round.f32
(VM::RoundF32Op)","text":"Rounds the value to the nearest integer away from zero
Syntax:
operation ::= `vm.round.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_131","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_115","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64even-vmroundf64evenop","title":"vm.round.f64.even
(VM::RoundF64EvenOp)","text":"Rounds the value to the nearest even integer
Syntax:
operation ::= `vm.round.f64.even` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_132","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_116","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64-vmroundf64op","title":"vm.round.f64
(VM::RoundF64Op)","text":"Rounds the value to the nearest integer away from zero
Syntax:
operation ::= `vm.round.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_133","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_117","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf32-vmsubf32op","title":"vm.sub.f32
(VM::SubF32Op)","text":"Floating point subtraction operation
Syntax:
operation ::= `vm.sub.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_134","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_118","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf64-vmsubf64op","title":"vm.sub.f64
(VM::SubF64Op)","text":"Floating point subtraction operation
Syntax:
operation ::= `vm.sub.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_135","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_119","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#floating-point-comparison-ops","title":"Floating-point comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32near-vmcmpeqf32nearop","title":"vm.cmp.eq.f32.near
(VM::CmpEQF32NearOp)","text":"Near floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.near` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_136","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_120","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32o-vmcmpeqf32oop","title":"vm.cmp.eq.f32.o
(VM::CmpEQF32OOp)","text":"Ordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_137","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_121","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32u-vmcmpeqf32uop","title":"vm.cmp.eq.f32.u
(VM::CmpEQF32UOp)","text":"Unordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_138","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_122","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64near-vmcmpeqf64nearop","title":"vm.cmp.eq.f64.near
(VM::CmpEQF64NearOp)","text":"Near floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.near` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_139","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_123","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64o-vmcmpeqf64oop","title":"vm.cmp.eq.f64.o
(VM::CmpEQF64OOp)","text":"Ordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_140","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_124","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64u-vmcmpeqf64uop","title":"vm.cmp.eq.f64.u
(VM::CmpEQF64UOp)","text":"Unordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_141","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_125","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32o-vmcmpgtef32oop","title":"vm.cmp.gte.f32.o
(VM::CmpGTEF32OOp)","text":"Ordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_142","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_126","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32u-vmcmpgtef32uop","title":"vm.cmp.gte.f32.u
(VM::CmpGTEF32UOp)","text":"Unordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_143","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_127","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64o-vmcmpgtef64oop","title":"vm.cmp.gte.f64.o
(VM::CmpGTEF64OOp)","text":"Ordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_144","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_128","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64u-vmcmpgtef64uop","title":"vm.cmp.gte.f64.u
(VM::CmpGTEF64UOp)","text":"Unordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_145","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_129","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32o-vmcmpgtf32oop","title":"vm.cmp.gt.f32.o
(VM::CmpGTF32OOp)","text":"Ordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_146","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_130","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32u-vmcmpgtf32uop","title":"vm.cmp.gt.f32.u
(VM::CmpGTF32UOp)","text":"Unordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_147","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_131","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64o-vmcmpgtf64oop","title":"vm.cmp.gt.f64.o
(VM::CmpGTF64OOp)","text":"Ordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_148","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_132","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64u-vmcmpgtf64uop","title":"vm.cmp.gt.f64.u
(VM::CmpGTF64UOp)","text":"Unordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_149","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_133","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32o-vmcmpltef32oop","title":"vm.cmp.lte.f32.o
(VM::CmpLTEF32OOp)","text":"Ordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_150","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_134","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32u-vmcmpltef32uop","title":"vm.cmp.lte.f32.u
(VM::CmpLTEF32UOp)","text":"Unordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_151","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_135","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64o-vmcmpltef64oop","title":"vm.cmp.lte.f64.o
(VM::CmpLTEF64OOp)","text":"Ordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_152","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_136","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64u-vmcmpltef64uop","title":"vm.cmp.lte.f64.u
(VM::CmpLTEF64UOp)","text":"Unordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_153","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_137","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32o-vmcmpltf32oop","title":"vm.cmp.lt.f32.o
(VM::CmpLTF32OOp)","text":"Ordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_154","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_138","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32u-vmcmpltf32uop","title":"vm.cmp.lt.f32.u
(VM::CmpLTF32UOp)","text":"Unordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_155","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_139","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64o-vmcmpltf64oop","title":"vm.cmp.lt.f64.o
(VM::CmpLTF64OOp)","text":"Ordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_156","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_140","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64u-vmcmpltf64uop","title":"vm.cmp.lt.f64.u
(VM::CmpLTF64UOp)","text":"Unordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_157","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_141","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32o-vmcmpnef32oop","title":"vm.cmp.ne.f32.o
(VM::CmpNEF32OOp)","text":"Ordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_158","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_142","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32u-vmcmpnef32uop","title":"vm.cmp.ne.f32.u
(VM::CmpNEF32UOp)","text":"Unordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_159","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_143","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64o-vmcmpnef64oop","title":"vm.cmp.ne.f64.o
(VM::CmpNEF64OOp)","text":"Ordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_160","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_144","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64u-vmcmpnef64uop","title":"vm.cmp.ne.f64.u
(VM::CmpNEF64UOp)","text":"Unordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_161","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_145","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32o-vmcmpnzf32oop","title":"vm.cmp.nz.f32.o
(VM::CmpNZF32OOp)","text":"Ordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f32.o` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_162","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_146","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32u-vmcmpnzf32uop","title":"vm.cmp.nz.f32.u
(VM::CmpNZF32UOp)","text":"Unordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f32.u` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_163","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_147","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64o-vmcmpnzf64oop","title":"vm.cmp.nz.f64.o
(VM::CmpNZF64OOp)","text":"Ordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f64.o` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_164","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_148","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64u-vmcmpnzf64uop","title":"vm.cmp.nz.f64.u
(VM::CmpNZF64UOp)","text":"Unordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f64.u` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_165","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_149","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf32-vmcmpnanf32op","title":"vm.cmp.nan.f32
(VM::CmpNaNF32Op)","text":"Floating-point NaN comparison operation
Syntax:
operation ::= `vm.cmp.nan.f32` $operand attr-dict `:` type($operand)\n
Returns 1 if the value is NaN.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_166","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_150","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf64-vmcmpnanf64op","title":"vm.cmp.nan.f64
(VM::CmpNaNF64Op)","text":"Floating-point NaN comparison operation
Syntax:
operation ::= `vm.cmp.nan.f64` $operand attr-dict `:` type($operand)\n
Returns 1 if the value is NaN.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_167","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_151","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#floating-point-math-ops","title":"Floating-point math ops","text":"These map directly to the math
dialect.
"},{"location":"reference/mlir-dialects/VM/#vmatan2f32-vmatan2f32op","title":"vm.atan2.f32
(VM::Atan2F32Op)","text":"2-argument arcus tangent of the given values
Syntax:
operation ::= `vm.atan2.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_168","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_152","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatan2f64-vmatan2f64op","title":"vm.atan2.f64
(VM::Atan2F64Op)","text":"2-argument arcus tangent of the given values
Syntax:
operation ::= `vm.atan2.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_169","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_153","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf32-vmatanf32op","title":"vm.atan.f32
(VM::AtanF32Op)","text":"Arcus tangent of the given value
Syntax:
operation ::= `vm.atan.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_170","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_154","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf64-vmatanf64op","title":"vm.atan.f64
(VM::AtanF64Op)","text":"Arcus tangent of the given value
Syntax:
operation ::= `vm.atan.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_171","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_155","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf32-vmcosf32op","title":"vm.cos.f32
(VM::CosF32Op)","text":"Cosine of the specified value
Syntax:
operation ::= `vm.cos.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_172","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_156","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf64-vmcosf64op","title":"vm.cos.f64
(VM::CosF64Op)","text":"Cosine of the specified value
Syntax:
operation ::= `vm.cos.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_173","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_157","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff32-vmerff32op","title":"vm.erf.f32
(VM::ErfF32Op)","text":"Computes the error function of the specified value
Syntax:
operation ::= `vm.erf.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_174","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_158","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff64-vmerff64op","title":"vm.erf.f64
(VM::ErfF64Op)","text":"Computes the error function of the specified value
Syntax:
operation ::= `vm.erf.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_175","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_159","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f32-vmexp2f32op","title":"vm.exp2.f32
(VM::Exp2F32Op)","text":"Base-2 exponential of the specified value
Syntax:
operation ::= `vm.exp2.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_176","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_160","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f64-vmexp2f64op","title":"vm.exp2.f64
(VM::Exp2F64Op)","text":"Base-2 exponential of the specified value
Syntax:
operation ::= `vm.exp2.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_177","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_161","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf32-vmexpf32op","title":"vm.exp.f32
(VM::ExpF32Op)","text":"Base-e exponential of the specified value
Syntax:
operation ::= `vm.exp.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_178","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_162","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf64-vmexpf64op","title":"vm.exp.f64
(VM::ExpF64Op)","text":"Base-e exponential of the specified value
Syntax:
operation ::= `vm.exp.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_179","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_163","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f32-vmexpm1f32op","title":"vm.expm1.f32
(VM::ExpM1F32Op)","text":"Base-e exponential of the specified value minus 1
Syntax:
operation ::= `vm.expm1.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_180","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_164","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f64-vmexpm1f64op","title":"vm.expm1.f64
(VM::ExpM1F64Op)","text":"Base-e exponential of the specified value minus 1
Syntax:
operation ::= `vm.expm1.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_181","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_165","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f32-vmlog10f32op","title":"vm.log10.f32
(VM::Log10F32Op)","text":"Base-10 logarithm of the specified value
Syntax:
operation ::= `vm.log10.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_182","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_166","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f64-vmlog10f64op","title":"vm.log10.f64
(VM::Log10F64Op)","text":"Base-10 logarithm of the specified value
Syntax:
operation ::= `vm.log10.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_183","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_167","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf32-vmlog1pf32op","title":"vm.log1p.f32
(VM::Log1pF32Op)","text":"Natural logarithm of one plus the given value
Syntax:
operation ::= `vm.log1p.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_184","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_168","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf64-vmlog1pf64op","title":"vm.log1p.f64
(VM::Log1pF64Op)","text":"Natural logarithm of one plus the given value
Syntax:
operation ::= `vm.log1p.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_185","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_169","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f32-vmlog2f32op","title":"vm.log2.f32
(VM::Log2F32Op)","text":"Base-2 logarithm of the specified value
Syntax:
operation ::= `vm.log2.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_186","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_170","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f64-vmlog2f64op","title":"vm.log2.f64
(VM::Log2F64Op)","text":"Base-2 logarithm of the specified value
Syntax:
operation ::= `vm.log2.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_187","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_171","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf32-vmlogf32op","title":"vm.log.f32
(VM::LogF32Op)","text":"Base-e logarithm of the specified value
Syntax:
operation ::= `vm.log.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_188","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_172","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf64-vmlogf64op","title":"vm.log.f64
(VM::LogF64Op)","text":"Base-e logarithm of the specified value
Syntax:
operation ::= `vm.log.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_189","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_173","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf32-vmpowf32op","title":"vm.pow.f32
(VM::PowF32Op)","text":"Floating point raised to the power of operation
Syntax:
operation ::= `vm.pow.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_190","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_174","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf64-vmpowf64op","title":"vm.pow.f64
(VM::PowF64Op)","text":"Floating point raised to the power of operation
Syntax:
operation ::= `vm.pow.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_191","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_175","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf32-vmrsqrtf32op","title":"vm.rsqrt.f32
(VM::RsqrtF32Op)","text":"Reciprocal of sqrt (1 / sqrt of the specified value)
Syntax:
operation ::= `vm.rsqrt.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_192","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_176","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf64-vmrsqrtf64op","title":"vm.rsqrt.f64
(VM::RsqrtF64Op)","text":"Reciprocal of sqrt (1 / sqrt of the specified value)
Syntax:
operation ::= `vm.rsqrt.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_193","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_177","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf32-vmsinf32op","title":"vm.sin.f32
(VM::SinF32Op)","text":"Sine of the specified value
Syntax:
operation ::= `vm.sin.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_194","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_178","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf64-vmsinf64op","title":"vm.sin.f64
(VM::SinF64Op)","text":"Sine of the specified value
Syntax:
operation ::= `vm.sin.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_195","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_179","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf32-vmsqrtf32op","title":"vm.sqrt.f32
(VM::SqrtF32Op)","text":"Sqrt of the specified value
Syntax:
operation ::= `vm.sqrt.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_196","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_180","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf64-vmsqrtf64op","title":"vm.sqrt.f64
(VM::SqrtF64Op)","text":"Sqrt of the specified value
Syntax:
operation ::= `vm.sqrt.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_197","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_181","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf32-vmtanhf32op","title":"vm.tanh.f32
(VM::TanhF32Op)","text":"Hyperbolic tangent of the specified value
Syntax:
operation ::= `vm.tanh.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_198","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_182","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf64-vmtanhf64op","title":"vm.tanh.f64
(VM::TanhF64Op)","text":"Hyperbolic tangent of the specified value
Syntax:
operation ::= `vm.tanh.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_199","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_183","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/VM/#vmglobaladdress-vmglobaladdressop","title":"vm.global.address
(VM::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `vm.global.address` $global attr-dict `:` type($result)\n
Returns an indirect address reference to the given global. During export the address will be converted to the natural format of the global table (for example, ordinals for refs and byte offsets for primitive types).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, Util_GlobalAddressOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_184","title":"Results:","text":"Result Description result
32-bit signless integer or a pointer-like reference"},{"location":"reference/mlir-dialects/VM/#vmglobalf32-vmglobalf32op","title":"vm.global.f32
(VM::GlobalF32Op)","text":"32-bit floating-point global declaration
Syntax:
operation ::= `vm.global.f32` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove, VM_ExtF32
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
FloatAttr32-bit floating-point value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalf64-vmglobalf64op","title":"vm.global.f64
(VM::GlobalF64Op)","text":"64-bit floating-point global declaration
Syntax:
operation ::= `vm.global.f64` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove, VM_ExtF64
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
FloatAttr64-bit floating-point value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali32-vmglobali32op","title":"vm.global.i32
(VM::GlobalI32Op)","text":"32-bit integer global declaration
Syntax:
operation ::= `vm.global.i32` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
IntegerAttr32-bit integer value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali64-vmglobali64op","title":"vm.global.i64
(VM::GlobalI64Op)","text":"64-bit integer global declaration
Syntax:
operation ::= `vm.global.i64` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
IntegerAttr64-bit integer value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf32-vmgloballoadf32op","title":"vm.global.load.f32
(VM::GlobalLoadF32Op)","text":"Global 32-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.f32` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_185","title":"Results:","text":"Result Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf64-vmgloballoadf64op","title":"vm.global.load.f64
(VM::GlobalLoadF64Op)","text":"Global 64-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.f64` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_186","title":"Results:","text":"Result Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi32-vmgloballoadi32op","title":"vm.global.load.i32
(VM::GlobalLoadI32Op)","text":"Global 32-bit integer load operation
Syntax:
operation ::= `vm.global.load.i32` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_187","title":"Results:","text":"Result Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi64-vmgloballoadi64op","title":"vm.global.load.i64
(VM::GlobalLoadI64Op)","text":"Global 64-bit integer load operation
Syntax:
operation ::= `vm.global.load.i64` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_188","title":"Results:","text":"Result Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf32-vmgloballoadindirectf32op","title":"vm.global.load.indirect.f32
(VM::GlobalLoadIndirectF32Op)","text":"Global 32-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.indirect.f32` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Traits: VM_ExtF64
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_200","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<32-bit float>"},{"location":"reference/mlir-dialects/VM/#results_189","title":"Results:","text":"Result Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf64-vmgloballoadindirectf64op","title":"vm.global.load.indirect.f64
(VM::GlobalLoadIndirectF64Op)","text":"Global 64-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.indirect.f64` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Traits: VM_ExtF64
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_201","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<64-bit float>"},{"location":"reference/mlir-dialects/VM/#results_190","title":"Results:","text":"Result Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti32-vmgloballoadindirecti32op","title":"vm.global.load.indirect.i32
(VM::GlobalLoadIndirectI32Op)","text":"Global 32-bit integer load operation
Syntax:
operation ::= `vm.global.load.indirect.i32` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_202","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<32-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#results_191","title":"Results:","text":"Result Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti64-vmgloballoadindirecti64op","title":"vm.global.load.indirect.i64
(VM::GlobalLoadIndirectI64Op)","text":"Global 64-bit integer load operation
Syntax:
operation ::= `vm.global.load.indirect.i64` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_203","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<64-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#results_192","title":"Results:","text":"Result Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectref-vmgloballoadindirectrefop","title":"vm.global.load.indirect.ref
(VM::GlobalLoadIndirectRefOp)","text":"Global ref load operation
Syntax:
operation ::= `vm.global.load.indirect.ref` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a ref of the given type.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_204","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#results_193","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmgloballoadref-vmgloballoadrefop","title":"vm.global.load.ref
(VM::GlobalLoadRefOp)","text":"Global ref load operation
Syntax:
operation ::= `vm.global.load.ref` $global attr-dict `:` type($value)\n
Loads the value of a global containing a ref of the given type.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_194","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmglobalref-vmglobalrefop","title":"vm.global.ref
(VM::GlobalRefOp)","text":"Ref global declaration
Syntax:
operation ::= `vm.global.ref` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n `:` $type\n
Defines a global value that is a ref of a specific type. The global will retain the ref object for the lifetime of the context or until the value is replaced with a store or reset. Initialized to null unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: GlobalOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref32-vmglobalstoref32op","title":"vm.global.store.f32
(VM::GlobalStoreF32Op)","text":"Global 32-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.f32` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Traits: VM_ExtF32
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_205","title":"Operands:","text":"Operand Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref64-vmglobalstoref64op","title":"vm.global.store.f64
(VM::GlobalStoreF64Op)","text":"Global 64-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.f64` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Traits: VM_ExtF64
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_206","title":"Operands:","text":"Operand Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei32-vmglobalstorei32op","title":"vm.global.store.i32
(VM::GlobalStoreI32Op)","text":"Global 32-bit integer store operation
Syntax:
operation ::= `vm.global.store.i32` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_207","title":"Operands:","text":"Operand Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei64-vmglobalstorei64op","title":"vm.global.store.i64
(VM::GlobalStoreI64Op)","text":"Global 64-bit integer store operation
Syntax:
operation ::= `vm.global.store.i64` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_208","title":"Operands:","text":"Operand Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf32-vmglobalstoreindirectf32op","title":"vm.global.store.indirect.f32
(VM::GlobalStoreIndirectF32Op)","text":"Global 32-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.indirect.f32` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Traits: VM_ExtF32
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_209","title":"Operands:","text":"Operand Description value
32-bit float global
32-bit signless integer or ptr<32-bit float>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf64-vmglobalstoreindirectf64op","title":"vm.global.store.indirect.f64
(VM::GlobalStoreIndirectF64Op)","text":"Global 64-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.indirect.f64` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Traits: VM_ExtF64
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_210","title":"Operands:","text":"Operand Description value
64-bit float global
32-bit signless integer or ptr<64-bit float>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti32-vmglobalstoreindirecti32op","title":"vm.global.store.indirect.i32
(VM::GlobalStoreIndirectI32Op)","text":"Global 32-bit integer store operation
Syntax:
operation ::= `vm.global.store.indirect.i32` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_211","title":"Operands:","text":"Operand Description value
32-bit signless integer global
32-bit signless integer or ptr<32-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti64-vmglobalstoreindirecti64op","title":"vm.global.store.indirect.i64
(VM::GlobalStoreIndirectI64Op)","text":"Global 64-bit integer store operation
Syntax:
operation ::= `vm.global.store.indirect.i64` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_212","title":"Operands:","text":"Operand Description value
64-bit signless integer global
32-bit signless integer or ptr<64-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectref-vmglobalstoreindirectrefop","title":"vm.global.store.indirect.ref
(VM::GlobalStoreIndirectRefOp)","text":"Global ref stores operation
Syntax:
operation ::= `vm.global.store.indirect.ref` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a ref to a global, retaining it until the global is reset.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_213","title":"Operands:","text":"Operand Description value
ref global
32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreref-vmglobalstorerefop","title":"vm.global.store.ref
(VM::GlobalStoreRefOp)","text":"Global ref stores operation
Syntax:
operation ::= `vm.global.store.ref` $value `,` $global attr-dict `:` type($value)\n
Stores a ref to a global, retaining it until the global is reset.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_214","title":"Operands:","text":"Operand Description value
ref"},{"location":"reference/mlir-dialects/VM/#integer-arithmetic-ops","title":"Integer arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsi32-vmabsi32op","title":"vm.abs.i32
(VM::AbsI32Op)","text":"Integer absolute-value operation
Syntax:
operation ::= `vm.abs.i32` $operand attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_215","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_195","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmabsi64-vmabsi64op","title":"vm.abs.i64
(VM::AbsI64Op)","text":"Integer absolute-value operation
Syntax:
operation ::= `vm.abs.i64` $operand attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_216","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_196","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi32-vmaddi32op","title":"vm.add.i32
(VM::AddI32Op)","text":"Integer add operation
Syntax:
operation ::= `vm.add.i32` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_217","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_197","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi64-vmaddi64op","title":"vm.add.i64
(VM::AddI64Op)","text":"Integer add operation
Syntax:
operation ::= `vm.add.i64` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_218","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_198","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32s-vmdivi32sop","title":"vm.div.i32.s
(VM::DivI32SOp)","text":"Signed integer division operation
Syntax:
operation ::= `vm.div.i32.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_219","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_199","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32u-vmdivi32uop","title":"vm.div.i32.u
(VM::DivI32UOp)","text":"Unsigned integer division operation
Syntax:
operation ::= `vm.div.i32.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_220","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_200","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64s-vmdivi64sop","title":"vm.div.i64.s
(VM::DivI64SOp)","text":"Signed integer division operation
Syntax:
operation ::= `vm.div.i64.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_221","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_201","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64u-vmdivi64uop","title":"vm.div.i64.u
(VM::DivI64UOp)","text":"Unsigned integer division operation
Syntax:
operation ::= `vm.div.i64.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_222","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_202","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai32-vmfmai32op","title":"vm.fma.i32
(VM::FMAI32Op)","text":"Integer fused-multiply add operation (a*b+c)
Syntax:
operation ::= `vm.fma.i32` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_223","title":"Operands:","text":"Operand Description a
32-bit signless integer b
32-bit signless integer c
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_203","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai64-vmfmai64op","title":"vm.fma.i64
(VM::FMAI64Op)","text":"Integer fused-multiply add operation (a*b+c)
Syntax:
operation ::= `vm.fma.i64` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_224","title":"Operands:","text":"Operand Description a
64-bit signless integer b
64-bit signless integer c
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_204","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32s-vmmaxi32sop","title":"vm.max.i32.s
(VM::MaxI32SOp)","text":"Signed integer maximum operation
Syntax:
operation ::= `vm.max.i32.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_225","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_205","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32u-vmmaxi32uop","title":"vm.max.i32.u
(VM::MaxI32UOp)","text":"Unsigned integer maximum operation
Syntax:
operation ::= `vm.max.i32.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_226","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_206","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64s-vmmaxi64sop","title":"vm.max.i64.s
(VM::MaxI64SOp)","text":"Signed integer maximum operation
Syntax:
operation ::= `vm.max.i64.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_227","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_207","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64u-vmmaxi64uop","title":"vm.max.i64.u
(VM::MaxI64UOp)","text":"Unsigned integer maximum operation
Syntax:
operation ::= `vm.max.i64.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_228","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_208","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32s-vmmini32sop","title":"vm.min.i32.s
(VM::MinI32SOp)","text":"Signed integer minimum operation
Syntax:
operation ::= `vm.min.i32.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_229","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_209","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32u-vmmini32uop","title":"vm.min.i32.u
(VM::MinI32UOp)","text":"Unsigned integer minimum operation
Syntax:
operation ::= `vm.min.i32.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_230","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_210","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64s-vmmini64sop","title":"vm.min.i64.s
(VM::MinI64SOp)","text":"Signed integer minimum operation
Syntax:
operation ::= `vm.min.i64.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_231","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_211","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64u-vmmini64uop","title":"vm.min.i64.u
(VM::MinI64UOp)","text":"Unsigned integer minimum operation
Syntax:
operation ::= `vm.min.i64.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_232","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_212","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli32-vmmuli32op","title":"vm.mul.i32
(VM::MulI32Op)","text":"Integer multiplication operation
Syntax:
operation ::= `vm.mul.i32` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_233","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_213","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli64-vmmuli64op","title":"vm.mul.i64
(VM::MulI64Op)","text":"Integer multiplication operation
Syntax:
operation ::= `vm.mul.i64` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_234","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_214","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32s-vmremi32sop","title":"vm.rem.i32.s
(VM::RemI32SOp)","text":"Signed integer division remainder operation
Syntax:
operation ::= `vm.rem.i32.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_235","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_215","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32u-vmremi32uop","title":"vm.rem.i32.u
(VM::RemI32UOp)","text":"Unsigned integer division remainder operation
Syntax:
operation ::= `vm.rem.i32.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_236","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_216","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64s-vmremi64sop","title":"vm.rem.i64.s
(VM::RemI64SOp)","text":"Signed integer division remainder operation
Syntax:
operation ::= `vm.rem.i64.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_237","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_217","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64u-vmremi64uop","title":"vm.rem.i64.u
(VM::RemI64UOp)","text":"Unsigned integer division remainder operation
Syntax:
operation ::= `vm.rem.i64.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_238","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_218","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi32-vmsubi32op","title":"vm.sub.i32
(VM::SubI32Op)","text":"Integer subtract operation
Syntax:
operation ::= `vm.sub.i32` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_239","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_219","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi64-vmsubi64op","title":"vm.sub.i64
(VM::SubI64Op)","text":"Integer subtract operation
Syntax:
operation ::= `vm.sub.i64` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_240","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_220","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#integer-bit-manipulation-ops","title":"Integer bit manipulation ops","text":""},{"location":"reference/mlir-dialects/VM/#vmandi32-vmandi32op","title":"vm.and.i32
(VM::AndI32Op)","text":"Integer binary and operation
Syntax:
operation ::= `vm.and.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_241","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_221","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmandi64-vmandi64op","title":"vm.and.i64
(VM::AndI64Op)","text":"Integer binary and operation
Syntax:
operation ::= `vm.and.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_242","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_222","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi32-vmctlzi32op","title":"vm.ctlz.i32
(VM::CtlzI32Op)","text":"Counts the leading zeros in an integer value
Syntax:
operation ::= `vm.ctlz.i32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_243","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_223","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi64-vmctlzi64op","title":"vm.ctlz.i64
(VM::CtlzI64Op)","text":"Counts the leading zeros in an integer value
Syntax:
operation ::= `vm.ctlz.i64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_244","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_224","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti32-vmnoti32op","title":"vm.not.i32
(VM::NotI32Op)","text":"Integer binary not operation
Syntax:
operation ::= `vm.not.i32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_245","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_225","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti64-vmnoti64op","title":"vm.not.i64
(VM::NotI64Op)","text":"Integer binary not operation
Syntax:
operation ::= `vm.not.i64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_246","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_226","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori32-vmori32op","title":"vm.or.i32
(VM::OrI32Op)","text":"Integer binary or operation
Syntax:
operation ::= `vm.or.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_247","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_227","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori64-vmori64op","title":"vm.or.i64
(VM::OrI64Op)","text":"Integer binary or operation
Syntax:
operation ::= `vm.or.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_248","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_228","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori32-vmxori32op","title":"vm.xor.i32
(VM::XorI32Op)","text":"Integer binary exclusive-or operation
Syntax:
operation ::= `vm.xor.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_249","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_229","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori64-vmxori64op","title":"vm.xor.i64
(VM::XorI64Op)","text":"Integer binary exclusive-or operation
Syntax:
operation ::= `vm.xor.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_250","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_230","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#list-ops","title":"List ops","text":""},{"location":"reference/mlir-dialects/VM/#vmlistalloc-vmlistallocop","title":"vm.list.alloc
(VM::ListAllocOp)","text":"Allocates a new empty list
Syntax:
operation ::= `vm.list.alloc` operands attr-dict `:` `(` type($initial_capacity) `)` `->` type($result)\n
Allocates a new typed list with a minimum initial_capacity.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_251","title":"Operands:","text":"Operand Description initial_capacity
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_231","title":"Results:","text":"Result Description result
list"},{"location":"reference/mlir-dialects/VM/#vmlistgetf32-vmlistgetf32op","title":"vm.list.get.f32
(VM::ListGetF32Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.f32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_252","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_232","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgetf64-vmlistgetf64op","title":"vm.list.get.f64
(VM::ListGetF64Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.f64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_253","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_233","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgeti32-vmlistgeti32op","title":"vm.list.get.i32
(VM::ListGetI32Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.i32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_254","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_234","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgeti64-vmlistgeti64op","title":"vm.list.get.i64
(VM::ListGetI64Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.i64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_255","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_235","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgetref-vmlistgetrefop","title":"vm.list.get.ref
(VM::ListGetRefOp)","text":"Ref type element accessor
Syntax:
operation ::= `vm.list.get.ref` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the ref value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_256","title":"Operands:","text":"Operand Description list
list index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_236","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmlistreserve-vmlistreserveop","title":"vm.list.reserve
(VM::ListReserveOp)","text":"Reserves capacity for list growth
Syntax:
operation ::= `vm.list.reserve` operands attr-dict `:` `(` type($list) `,` type($minimum_capacity) `)`\n
Reserves storage for at least minimum_capacity elements. If the list already has at least the specified capacity the operation is ignored.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_257","title":"Operands:","text":"Operand Description list
list minimum_capacity
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistresize-vmlistresizeop","title":"vm.list.resize
(VM::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `vm.list.resize` operands attr-dict `:` `(` type($list) `,` type($new_size) `)`\n
Resizes the list to contain new_size elements. This will either truncate the list if the existing size is greater than new_size or extend the list with the default list value of 0 if storing primitives and null if refs.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_258","title":"Operands:","text":"Operand Description list
list new_size
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetf32-vmlistsetf32op","title":"vm.list.set.f32
(VM::ListSetF32Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.f32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_259","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistsetf64-vmlistsetf64op","title":"vm.list.set.f64
(VM::ListSetF64Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.f64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_260","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistseti32-vmlistseti32op","title":"vm.list.set.i32
(VM::ListSetI32Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.i32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_261","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistseti64-vmlistseti64op","title":"vm.list.set.i64
(VM::ListSetI64Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.i64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_262","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetref-vmlistsetrefop","title":"vm.list.set.ref
(VM::ListSetRefOp)","text":"Ref type element mutator
Syntax:
operation ::= `vm.list.set.ref` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new ref value (possibly null).
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_263","title":"Operands:","text":"Operand Description list
list index
32-bit signless integer value
ref"},{"location":"reference/mlir-dialects/VM/#vmlistsize-vmlistsizeop","title":"vm.list.size
(VM::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `vm.list.size` operands attr-dict `:` `(` type($list) `)` `->` type($result)\n
Returns the current size of the list in elements.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_264","title":"Operands:","text":"Operand Description list
list"},{"location":"reference/mlir-dialects/VM/#results_237","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#ref-comparison-ops","title":"Ref comparison ops","text":"Comparison ops for vm.ref
.
"},{"location":"reference/mlir-dialects/VM/#vmcmpeqref-vmcmpeqrefop","title":"vm.cmp.eq.ref
(VM::CmpEQRefOp)","text":"Ref equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.ref` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_265","title":"Operands:","text":"Operand Description lhs
ref rhs
ref"},{"location":"reference/mlir-dialects/VM/#results_238","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpneref-vmcmpnerefop","title":"vm.cmp.ne.ref
(VM::CmpNERefOp)","text":"Ref inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.ref` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_266","title":"Operands:","text":"Operand Description lhs
ref rhs
ref"},{"location":"reference/mlir-dialects/VM/#results_239","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzref-vmcmpnzrefop","title":"vm.cmp.nz.ref
(VM::CmpNZRefOp)","text":"Ref non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.ref` $operand attr-dict `:` type($operand)\n
Compares the given ref operand for a non-zero/null value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_267","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_240","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/VM/#vmexport-vmexportop","title":"vm.export
(VM::ExportOp)","text":"Exports a function from the module
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal functions.
Interfaces: SymbolUserOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute export_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmfunc-vmfuncop","title":"vm.func
(VM::FuncOp)","text":"Function defined with VM control flow ops
Represents a function containing VM ops and those of compatible dialects. All flow control is performed by VM ops.
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type ordinal
::mlir::IntegerAttrordinal value noinline
::mlir::UnitAttrunit attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmimport-vmimportop","title":"vm.import
(VM::ImportOp)","text":"Imports a function from an external module
Specifies a function that should be imported from either the runtime or an external VM module.
Required imports can be declared with a minimum version of the module that contains the import. The maximum declared minimum version of all required imports from the module will become the required minimum version at runtime.
Optional imports not present at runtime will be invalid to call and whether they were resolved can be queried with vm.import.resolved
.
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes sym_visibility
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrordinal value is_optional
::mlir::UnitAttrunit attribute minimum_version
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vminitializer-vminitializerop","title":"vm.initializer
(VM::InitializerOp)","text":"Global initialization function
A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, Util_InitializerOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmmodule-vmmoduleop","title":"vm.module
(VM::ModuleOp)","text":"Module containing VM functions and variables
Syntax:
operation ::= `vm.module` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
Top-level container for VM functions.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal_counts
::mlir::iree_compiler::IREE::VM::OrdinalCountsAttr version
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vmmodule_terminator-vmmoduleterminatorop","title":"vm.module_terminator
(VM::ModuleTerminatorOp)","text":"Terminator pseudo-op for the module op
Syntax:
operation ::= `vm.module_terminator` attr-dict\n
Traits: HasParent, Terminator
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/VM/#ordinalcountsattr","title":"OrdinalCountsAttr","text":"Syntax:
#vm.ordinal_counts<\n int32_t, # import_funcs\n int32_t, # export_funcs\n int32_t, # internal_funcs\n int32_t, # global_bytes\n int32_t, # global_refs\n int32_t, # rodatas\n int32_t # rwdatas\n>\n
"},{"location":"reference/mlir-dialects/VM/#parameters","title":"Parameters:","text":"Parameter C++ type Description import_funcs int32_t
export_funcs int32_t
internal_funcs int32_t
global_bytes int32_t
global_refs int32_t
rodatas int32_t
rwdatas int32_t
"},{"location":"reference/mlir-dialects/VMVX/","title":"VMVX","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvx-dialect","title":"'vmvx' Dialect","text":"Vector extensions to the IREE VM.
This is a reference dialect representing a simple IREE VM-based linear algebra module that is used as a library at runtime. The ops in this dialect map (roughly) 1:1 with the exported functions in the runtime module.
See vmvx.imports.mlir
for the full list of exported functions.
- 'vmvx' Dialect
- Operation definition
- ABI ops
- vmvx.binary (VMVX::BinaryOp)
- vmvx.copy (VMVX::CopyOp)
- vmvx.fill2d (VMVX::Fill2DOp)
- vmvx.unary (VMVX::UnaryOp)
- Utility ops
- vmvx.get_buffer_descriptor (VMVX::GetBufferDescriptorOp)
- vmvx.get_raw_interface_binding_buffer (VMVX::GetRawInterfaceBindingBufferOp)
"},{"location":"reference/mlir-dialects/VMVX/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/VMVX/#abi-ops","title":"ABI ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxbinary-vmvxbinaryop","title":"vmvx.binary
(VMVX::BinaryOp)","text":"Performs a strided elementwise operation on two same-rank buffers
Syntax:
operation ::= `vmvx.binary` `op` `` `(` $opcode `:` $element_type `)`\n `lhs` `` `(` $lhs_buffer `offset` $lhs_offset `strides` `[` $lhs_strides `]` `:` type($lhs_buffer) `)`\n `rhs` `` `(` $rhs_buffer `offset` $rhs_offset `strides` `[` $rhs_strides `]` `:` type($rhs_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n attr-dict\n
Performs the operation in-place as if:
OUT = OP(LHS, RHS)\n
Where OP
is a concrete operation name as defined in ukernel/elementwise.h
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription opcode
::mlir::StringAttrstring attribute element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands","title":"Operands:","text":"Operand Description lhs_buffer
a reference counted byte buffer lhs_offset
index lhs_strides
variadic of index rhs_buffer
a reference counted byte buffer rhs_offset
index rhs_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxcopy-vmvxcopyop","title":"vmvx.copy
(VMVX::CopyOp)","text":"Copy from one buffer to another
Syntax:
operation ::= `vmvx.copy` `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n `:` $element_type\n attr-dict\n
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_1","title":"Operands:","text":"Operand Description in_buffer
a reference counted byte buffer in_offset
index in_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxfill2d-vmvxfill2dop","title":"vmvx.fill2d
(VMVX::Fill2DOp)","text":"Fill a tile with a scalar
Syntax:
operation ::= `vmvx.fill2d` `scalar` `` `(` $scalar `:` type($scalar) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `row_stride` $out_row_stride `:` type($out_buffer) `)`\n `sizes` `` `(` $m `,` $n `)`\n attr-dict\n
Fills a tile with dimensions [m, n] with a scalar.
"},{"location":"reference/mlir-dialects/VMVX/#operands_2","title":"Operands:","text":"Operand Description scalar
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float out_buffer
a reference counted byte buffer out_offset
index out_row_stride
index m
index n
index"},{"location":"reference/mlir-dialects/VMVX/#vmvxunary-vmvxunaryop","title":"vmvx.unary
(VMVX::UnaryOp)","text":"Performs a strided elementwise unary operation
Syntax:
operation ::= `vmvx.unary` `op` `` `(` $opcode `:` $element_type `)`\n `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n attr-dict\n
Performs the operation in-place as if:
OUT = OP(IN)\n
Where OP
is a concrete operation name as defined in ukernel/elementwise.h
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription opcode
::mlir::StringAttrstring attribute element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_3","title":"Operands:","text":"Operand Description in_buffer
a reference counted byte buffer in_offset
index in_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxget_buffer_descriptor-vmvxgetbufferdescriptorop","title":"vmvx.get_buffer_descriptor
(VMVX::GetBufferDescriptorOp)","text":"Late binds a base buffer/offset/strides
Syntax:
operation ::= `vmvx.get_buffer_descriptor` $source `:` type($source) `->` type(results) attr-dict\n
Queries a base buffer, offset and strides. This op is late bound to its source (alloca, binding, etc), allowing additional layers of transformations to be added as lowering progresses (or for buffers to be combined).
This op has canonicalization rules which will bubble it up through the view stack. A final reconciliation pass is used explicitly to bind it to concrete sources.
Traits: AlwaysSpeculatableImplTrait, SameVariadicResultSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VMVX/#operands_4","title":"Operands:","text":"Operand Description source
memref of any type values"},{"location":"reference/mlir-dialects/VMVX/#results","title":"Results:","text":"Result Description base_buffer
a reference counted byte buffer offset
index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxget_raw_interface_binding_buffer-vmvxgetrawinterfacebindingbufferop","title":"vmvx.get_raw_interface_binding_buffer
(VMVX::GetRawInterfaceBindingBufferOp)","text":"Gets the raw buffer associated with a binding
Syntax:
operation ::= `vmvx.get_raw_interface_binding_buffer` `set` `(` $set `)` `binding` `(` $binding `)` attr-dict\n
Normally, a slice of a binding buffer is returned via hal.interface.binding.subspan. However, the normal VMVX lowering flow for this presumes that the result is a memref, and upon final conversion, it will offset the memref automatically to make it consistent.
This op is used in situations where earlier in a lowering, we have fully resolved the binding to a buffer and would just like the raw backing buffer as passed to the interface.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VMVX/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription set
::mlir::IntegerAttrindex attribute binding
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/VMVX/#results_1","title":"Results:","text":"Result Description buffer
a reference counted byte buffer"},{"location":"community/tags/","title":"Tags","text":"Website pages sorted by tag:
"},{"location":"community/tags/#android","title":"Android","text":" - Android cross-compilation
- Android LLDB debugging
"},{"location":"community/tags/#cpu","title":"CPU","text":" - RISC-V cross-compilation
- Matrix Multiplication with MMT4D
- Profiling CPUs
- CPU - Bare-Metal
- CPU
"},{"location":"community/tags/#cuda","title":"CUDA","text":" - CUDA backend
- CUDA backend design
- GPU - CUDA
"},{"location":"community/tags/#gpu","title":"GPU","text":" - CUDA backend
- Vulkan environment setup
- CUDA backend design
- Profiling GPUs using Vulkan
- GPU - CUDA
- GPU - Metal
- GPU - ROCm
- GPU - Vulkan
"},{"location":"community/tags/#jax","title":"JAX","text":" - JAX
- Extensions
- Glossary
"},{"location":"community/tags/#pytorch","title":"PyTorch","text":" - PyTorch
- Extensions
- Glossary
"},{"location":"community/tags/#python","title":"Python","text":" - JAX
- PyTorch
- TensorFlow
- TensorFlow Lite
- Python
"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":" - TFLite support via TOSA
- TensorFlow
- TensorFlow Lite
- Extensions
- Glossary
"},{"location":"community/tags/#vulkan","title":"Vulkan","text":" - Vulkan environment setup
- Profiling GPUs using Vulkan
- GPU - Vulkan
"},{"location":"community/tags/#web","title":"Web","text":" - Building with Emscripten
"},{"location":"community/tags/#ios","title":"iOS","text":" - iOS cross-compilation
- GPU - Metal
"},{"location":"community/blog/archive/2021/","title":"2021","text":""},{"location":"community/blog/category/platforms/","title":"Platforms","text":""},{"location":"community/blog/category/performance/","title":"Performance","text":""},{"location":"community/blog/category/frontends/","title":"Frontends","text":""}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"IREE","text":"IREE (Intermediate Representation Execution Environment1) is an MLIR-based end-to-end compiler and runtime that lowers Machine Learning (ML) models to a unified IR that scales up to meet the needs of the datacenter and down to satisfy the constraints and special considerations of mobile and edge deployments.
"},{"location":"#key-features","title":"Key features","text":" - Ahead-of-time compilation of scheduling and execution logic together
- Support for dynamic shapes, flow control, streaming, and other advanced model features
- Optimized for many CPU and GPU architectures
- Low overhead, pipelined execution for efficient power and resource usage
- Binary size as low as 30KB on embedded systems
- Debugging and profiling support
"},{"location":"#support-matrix","title":"Support matrix","text":"IREE supports importing from a variety of ML frameworks:
- TensorFlow
- TensorFlow Lite
- JAX
- PyTorch
- ONNX (hoped for)
The IREE compiler tools run on Linux, Windows, and macOS and can generate efficient code for a variety of runtime platforms:
- Linux
- Windows
- Android
- macOS
- iOS
- Bare metal
- WebAssembly (planned)
and architectures:
- ARM
- x86
- RISC-V
Support for hardware accelerators and APIs is also included:
- Vulkan
- CUDA
- Metal (for Apple silicon devices)
- WebGPU (planned)
"},{"location":"#project-architecture","title":"Project architecture","text":"IREE adopts a holistic approach towards ML model compilation: the IR produced contains both the scheduling logic, required to communicate data dependencies to low-level parallel pipelined hardware/API like Vulkan, and the execution logic, encoding dense computation on the hardware in the form of hardware/API-specific binaries like SPIR-V.
"},{"location":"#workflow-overview","title":"Workflow overview","text":"Using IREE involves the following general steps:
-
Import your model
Develop your program using one of the supported frameworks, then import into IREE
-
Select your deployment configuration
Identify your target platform, accelerator(s), and other constraints
-
Compile your model
Compile through IREE, picking settings based on your deployment configuration
-
Run your model
Use IREE's runtime components to execute your compiled model
"},{"location":"#importing-models-from-ml-frameworks","title":"Importing models from ML frameworks","text":"IREE supports importing models from a growing list of ML frameworks and model formats:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
"},{"location":"#selecting-deployment-configurations","title":"Selecting deployment configurations","text":"IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE for dynamic model deployments taking advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.
- What platforms are you targeting? Desktop? Mobile? An embedded system?
- What hardware should the bulk of your model run on? CPU? GPU?
- How fixed is your model itself? Can the weights be changed? Do you want to support loading different model architectures dynamically?
IREE supports the full set of these configurations using the same underlying technology.
"},{"location":"#compiling-models","title":"Compiling models","text":"Model compilation is performed ahead-of-time on a host machine for any combination of targets. The compilation process converts from layers and operators used by high level frameworks down into optimized native code and associated scheduling logic.
For example, compiling for GPU execution using Vulkan generates SPIR-V kernels and Vulkan API calls. For CPU execution, native code with static or dynamic linkage and the associated function calls are generated.
"},{"location":"#running-models","title":"Running models","text":"IREE offers a low level C API, as well as several sets of API bindings for compiling and running programs using various languages.
"},{"location":"#communication-channels","title":"Communication channels","text":" - GitHub issues: Feature requests, bugs, and other work tracking
- IREE Discord server: Daily development discussions with the core team and collaborators
- iree-discuss email list: Announcements, general and low-priority discussion
"},{"location":"#roadmap","title":"Roadmap","text":"IREE is in the early stages of development and is not yet ready for broad adoption. We use both GitHub Projects and GitHub Milestones to track progress.
-
Pronounced \"eerie\" and often styled with the emoji\u00a0\u21a9
"},{"location":"building-from-source/","title":"Building from source","text":"While IREE does offer binary distributions for its compiler tools and Python bindings, building from source is still useful when using IREE's runtime or when making changes to the compiler or import tools themselves.
"},{"location":"building-from-source/#reference-pages","title":"Reference pages","text":" - Getting started
- Android cross-compilation
- iOS cross-compilation
- RISC-V cross-compilation
"},{"location":"building-from-source/android/","title":"Android cross-compilation","text":"Running on a platform like Android involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific Android version and system architecture):
- IREE's compiler is built on the host and is used there to generate modules for the target
- IREE's runtime is built on the host for the target. The runtime is then either pushed to the target to run natively or is bundled into an Android APK
","tags":["Android"]},{"location":"building-from-source/android/#prerequisites","title":"Prerequisites","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-environment-setup","title":"Host environment setup","text":"You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.
","tags":["Android"]},{"location":"building-from-source/android/#install-android-ndk-and-adb","title":"Install Android NDK and ADB","text":"The Android Native Developer Kit (NDK) is needed to use native C/C++ code on Android. You can download it here, or, if you have installed Android Studio, you can follow this guide instead.
Note
Make sure the ANDROID_NDK
environment variable is set after installing the NDK.
ADB (the Android Debug Bridge) is also needed to communicate with Android devices from the command line. Install it following the official user guide.
","tags":["Android"]},{"location":"building-from-source/android/#configure-and-build","title":"Configure and build","text":"","tags":["Android"]},{"location":"building-from-source/android/#host-configuration","title":"Host configuration","text":"Build and install on your host machine:
cmake -GNinja -B ../iree-build/ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n.\ncmake --build ../iree-build/ --target install\n
","tags":["Android"]},{"location":"building-from-source/android/#target-configuration","title":"Target configuration","text":"Build the runtime using the Android NDK toolchain:
Linux macOS Windows cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"${ANDROID_NDK?}/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
cmake -GNinja -B ../iree-build-android/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"%ANDROID_NDK%/build/cmake/android.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=\"%CD%/../iree-build/install/bin\" \\\n-DANDROID_ABI=\"arm64-v8a\" \\\n-DANDROID_PLATFORM=\"android-29\" \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\ncmake --build ../iree-build-android/\n
Note
See the Android NDK CMake guide and Android Studio CMake guide for details on configuring CMake for Android.
The specific ANDROID_ABI
and ANDROID_PLATFORM
used should match your target device.
","tags":["Android"]},{"location":"building-from-source/android/#running-android-tests","title":"Running Android tests","text":"Make sure you enable developer options and USB debugging on your Android device and can see your it when you run adb devices
, then run all tests through ctest:
# Build test dependencies\ncmake --build ../iree-build-android/ --target iree-test-deps\n\n# Ensure that your Android device is visible\nadb devices\n\n# Run tests\nctest --test-dir ../iree-build-android/ --output-on-failure\n
This will automatically upload build artifacts to the connected Android device, run the tests, then report the status back to your host machine.
","tags":["Android"]},{"location":"building-from-source/android/#running-tools-directly","title":"Running tools directly","text":"Invoke the host compiler tools to produce a bytecode module FlatBuffer:
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
Push the Android runtime tools to the device, along with any FlatBuffer files:
adb push ../iree-build-android/tools/iree-run-module /data/local/tmp/\nadb shell chmod +x /data/local/tmp/iree-run-module\nadb push /tmp/simple_abs_vmvx.vmfb /data/local/tmp/\n
Run the tool:
adb shell /data/local/tmp/iree-run-module --device=local-task \\\n--module=/data/local/tmp/simple_abs_vmvx.vmfb \\\n--function=abs \\\n--input=\"f32=-5\"\n
","tags":["Android"]},{"location":"building-from-source/getting-started/","title":"Getting started","text":""},{"location":"building-from-source/getting-started/#prerequisites","title":"Prerequisites","text":"IREE can be built from source using CMake. We also recommend the Ninja CMake generator and the clang or MSVC C/C++ compilers.
Note - Other CMake generators and compilers IREE developers and CIs primarily use Ninja, clang, and MSVC. Other configurations (including the Makefile generator and gcc) are \"best effort\". Patches to improve support are always welcome.
Linux macOS Windows -
Install a compiler/linker (typically \"clang\" and \"lld\" package)
-
Install CMake (typically \"cmake\" package)
-
Install Ninja (typically \"ninja-build\" package)
On Debian/Ubuntu:
sudo apt install cmake ninja-build clang lld\n
-
Install CMake
-
Install Ninja
If using Homebrew:
brew install cmake ninja\n
-
Install MSVC from Visual Studio or \"Tools for Visual Studio\" on the official downloads page
-
Install CMake from the official downloads page
-
Install Ninja from the official site
Note
Initialize MSVC by running vcvarsall.bat
to build on the command line. See the official documentation for details.
"},{"location":"building-from-source/getting-started/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":"Use Git to clone the IREE repository and initialize its submodules:
git clone https://github.com/openxla/iree.git\ncd iree\ngit submodule update --init\n
The most basic CMake workflow is:
# Configure\ncmake -G Ninja -B ../iree-build/ .\n\n# Build\ncmake --build ../iree-build/\n
Caution - slow builds
The compiler build is complex. You will want a powerful machine and to tune the settings following the next section. In 2023, we've seen builds take around 5-10 minutes on 64-core Linux machines.
Use case permitting, disabling the compiler build with -DIREE_BUILD_COMPILER=OFF
will drastically simplify the build.
"},{"location":"building-from-source/getting-started/#configuration-settings","title":"Configuration settings","text":"The configure step should be customized for your build environment. These settings can improve compile and link times substantially.
Linux macOS Windows # Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON \\\n-DIREE_ENABLE_SPLIT_DWARF=ON \\\n-DIREE_ENABLE_THIN_ARCHIVES=ON \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON\n
# Recommended development options using clang and lld:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON \\\n-DIREE_ENABLE_SPLIT_DWARF=ON \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON\n
It is also possible to add -DIREE_ENABLE_THIN_ARCHIVES=ON
if the CMAKE_AR
variable is defined and points to the path of either the GNU binutils or LLVM ar
program, overriding the default Apple ar
.
# Recommended development options:\ncmake -G Ninja -B ../iree-build/ -S . \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DIREE_ENABLE_ASSERTIONS=ON\n
Tip - CMAKE_BUILD_TYPE values We recommend using the RelWithDebInfo
build type by default for a good balance of debug info and performance. The Debug
, Release
, and MinSizeRel
build types are useful in more specific cases. Note that several useful LLVM debugging features are only available in Debug
builds. See the official CMake documentation for general details.
Tip - Faster recompilation with ccache We recommend using ccache
with CMake, especially when rebuilding the compiler. To use it, configure CMake with:
-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n
See also our developer documentation for ccache.
"},{"location":"building-from-source/getting-started/#optional-components","title":"Optional components","text":"By default, the CMake build includes:
- All compiler targets (
llvm-cpu
, cuda
, vulkan-spirv
, etc.) - All runtime HAL drivers (
local-task
, cuda
, vulkan
, etc.) - All compiler input formats (StableHLO, TOSA, etc.)
- All compiler output formats (VM bytecode, C)
The default build does not include:
- Compiler or runtime bindings (Python, TFLite, etc.)
- Advanced features like AddressSanitizer or tracing instrumentation
- Experimental components
These can be changed via the IREE_
CMake options listed in the root CMakeLists.txt
.
"},{"location":"building-from-source/getting-started/#extensions-and-integrations","title":"Extensions and integrations","text":"When using IREE within other projects, you can register compiler plugins and runtime HAL drivers. You can also bring your own copy of LLVM and some other tools. See the root CMakeLists.txt
for details.
"},{"location":"building-from-source/getting-started/#tests-and-samples","title":"Tests and samples","text":""},{"location":"building-from-source/getting-started/#running-tests","title":"Running tests","text":"Tests are run via ctest. To build and run the core project tests:
# Build default targets\ncmake --build ../iree-build/\n\n# Run tests\nctest --test-dir ../iree-build/\n
Caution
This has two limitations:
- Large tests are excluded from the build by default
- Some tests require hardware like a GPU and will fail on unsupported systems
To build and then run all tests:
# 1. Build default targets\ncmake --build ../iree-build/\n\n# 2. Build test dependencies\ncmake --build ../iree-build/ --target iree-test-deps\n\n# 3. Run tests\nctest --test-dir ../iree-build/\n\n\n# Or combine all steps using a utility target\ncmake --build ../iree-build --target iree-run-tests\n
To run only certain tests, we have a helper script that converts environment variables into ctest filters:
# Run default tests\n./build_tools/cmake/ctest_all.sh ../iree-build\n\n# Run tests, turning CUDA on and Vulkan off\nexport IREE_CUDA_DISABLE=0\nexport IREE_VULKAN_DISABLE=1\n./build_tools/cmake/ctest_all.sh ../iree-build\n
"},{"location":"building-from-source/getting-started/#running-samples","title":"Running samples","text":"# Build\ncmake --build ../iree-build/\n\n# Run a standalone sample application\n../iree-build/runtime/src/iree/runtime/demo/hello_world_embedded\n# 4xf32=1 1.1 1.2 1.3\n# *\n# 4xf32=10 100 1000 10000\n# =\n# 4xf32=10 110 1200 13000\n\n# Try out the developer tools\nls ../iree-build/tools/\n../iree-build/tools/iree-compile --help\n../iree-build/tools/iree-run-module --help\n
"},{"location":"building-from-source/getting-started/#python-bindings","title":"Python bindings","text":"Python packages can either be built from source or installed from our releases. See the Python bindings page for details about the bindings themselves.
"},{"location":"building-from-source/getting-started/#dependencies","title":"Dependencies","text":"You will need a recent Python installation >=3.9 (we aim to support non-eol Python versions).
Tip - Managing Python versions Make sure your 'python' is what you expect:
Linux macOS Windows Note that on multi-python systems, this may have a version suffix, and on many Linuxes where python2 and python3 can co-exist, you may also want to use python3
.
which python\npython --version\n
Note that on multi-python systems, this may have a version suffix, and on macOS where python2 and python3 can co-exist, you may also want to use python3
.
which python\npython --version\n
The Python launcher for Windows (py
) can help manage versions.
which python\npython --version\npy --list-paths\n
Tip - Virtual environments We recommend using virtual environments to manage python packages, such as through venv
(about, tutorial):
Linux macOS Windows python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\n.venv\\Scripts\\activate.bat\n
When done, run deactivate
.
# Upgrade PIP before installing other requirements\npython -m pip install --upgrade pip\n\n# Install IREE build requirements\npython -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
"},{"location":"building-from-source/getting-started/#building-with-cmake","title":"Building with CMake","text":"To build the Python bindings, configure CMake with the IREE_BUILD_PYTHON_BINDINGS
option. We also recommend explicitly setting which Python executable to use with Python3_EXECUTABLE
:
# Configure (including other options as discussed above)\ncmake -G Ninja -B ../iree-build/ \\\n-DIREE_BUILD_PYTHON_BINDINGS=ON \\\n-DPython3_EXECUTABLE=\"$(which python)\" \\\n.\n\n# Build\ncmake --build ../iree-build/\n
"},{"location":"building-from-source/getting-started/#using-the-python-bindings","title":"Using the Python bindings","text":"Extend your PYTHONPATH
with IREE's bindings/python
paths and try importing:
Linux macOS Windows source ../iree-build/.env && export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
source ../iree-build/.env && export PYTHONPATH\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
..\\iree-build\\.env.ps1 # or ..\\iree-build\\.env.bat\n# The 'PYTHONPATH' environment variable should now contain\n# iree-build/compiler/bindings/python;iree-build/runtime/bindings/python\n\npython -c \"import iree.compiler; help(iree.compiler)\"\npython -c \"import iree.runtime; help(iree.runtime)\"\n
Using IREE's ML framework importers requires a few extra steps:
# Install test requirements\npython -m pip install -r integrations/tensorflow/test/requirements.txt\n\n# Install pure Python packages (no build required)\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n\n# Then test the tools:\niree-import-tf --help\niree-import-tflite --help\n
"},{"location":"building-from-source/ios/","title":"iOS cross-compilation","text":"Cross-compilation for iOS consists of the two steps below.
- On the macOS host, build the IREE compiler. We can run it to create IREE modules.
- Build the IREE runtime on the macOS host for iOS devices and the simulator. We can then run the IREE module on the simulator.
","tags":["iOS"]},{"location":"building-from-source/ios/#prerequisites","title":"Prerequisites","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#install-xcode-and-ios-sdk","title":"Install Xcode and iOS SDK","text":"For cross-compilation, you need Xcode. It comes with the SDKs for iOS devices and the simulator, as well as the simctl
tool for controlling the simulator from the command line.
","tags":["iOS"]},{"location":"building-from-source/ios/#host-environment-setup","title":"Host environment setup","text":"On your host platform, you should already be able to build IREE from source. Please make sure you've gone through the steps in getting started.
","tags":["iOS"]},{"location":"building-from-source/ios/#configure-and-build","title":"Configure and build","text":"","tags":["iOS"]},{"location":"building-from-source/ios/#build-the-iree-compiler-for-the-host","title":"Build the IREE compiler for the Host","text":"Build and install on your macOS host:
cmake -S . -B ../iree-build/ -GNinja \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install\n\ncmake --build ../iree-build/ --target install\n
","tags":["iOS"]},{"location":"building-from-source/ios/#cross-compile-the-iree-runtime-for-ios","title":"Cross-compile the IREE runtime for iOS","text":"Build the runtime for the iOS Simulator.
cmake -S . -B ../build-ios-sim -GNinja \\\n-DCMAKE_SYSTEM_NAME=iOS \\\n-DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphonesimulator Path) \\\n-DCMAKE_OSX_ARCHITECTURES=arm64 \\\n-DCMAKE_SYSTEM_PROCESSOR=arm64 \\\n-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 \\\n-DCMAKE_IOS_INSTALL_COMBINED=YES \\\n-DIREE_HOST_BIN_DIR=\"$PWD/../iree-build/install/bin\" \\\n-DCMAKE_INSTALL_PREFIX=../build-ios-sim/install \\\n-DIREE_BUILD_COMPILER=OFF\n\ncmake --build ../build-ios-sim --config Release --target install\n
Or, we can build the runtime for iOS devices it by changing the value of the -DCMAKE OSX SYSROOT
option to:
-DCMAKE_OSX_SYSROOT=$(xcodebuild -version -sdk iphoneos Path)\n
","tags":["iOS"]},{"location":"building-from-source/ios/#running-iree-modules-on-the-ios-simulator","title":"Running IREE modules on the iOS Simulator","text":"Run the IREE compiler on the host to generate a module.
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
We could test the generated module by running the macOS version of iree-run-module
on the host.
../iree-build/install/bin/iree-run-module \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--device=local-task \\\n--function=abs \\\n--input=\"f32=-5\"\n
To run it on the iOS simulator, we need to copy the vmfb file into the iree-run-module
iOS app bundle.
cp /tmp/simple_abs_vmvx.vmfb \\\n../build-ios-sim/install/bin/iree-run-module.app/\n
Open the iOS Simulator Manager on the host.
open -a Simulator\n
After creating and booting a simulator in this app, you can list it from the command-line.
xcrun simctl list devices | grep Booted\n
This is what should come out of the command:
iPhone 14 Pro (12341234-ABCD-ABCD-ABCD-123412341234) (Booted)\n
where iPhone 14 Pro
is the device being simulated and 12341234-ABCD-ABCD-ABCD-123412341234
is the simulator's unique device ID (UDID).
Install the app iree-run-module
on the simulator, given its UDID.
xcrun simctl install <UDID> ../build-ios-sim/install/bin/iree-run-module.app\n
Check the path to the installed bundle, where the simple_abs_vmvx.vmfb
module should be found.
ls $(xcrun simctl get_app_container <UDID> dev.iree.iree-run-module)\n
The string dev.iree.iree-run-module
is the bundle identifier of the iOS app. The CMake building process generates it and saves it in the property list (plist) file ../build-ios-sim/install/bin/iree-run-module.app/Info.plist
.
Launch the iree-run-module
app on the simulator to run the IREE module simple_abs_vmvx.vmfb
.
xcrun simctl launch --console \\\n<UDID> \\\ndev.iree.runmodule \\\n--device=local-task \\\n--function=abs \\\n--input=\"f32=-5\" \\\n--module=$(xcrun simctl get_app_container <UDID> dev.iree.iree-run-module)/simple_abs_vmvx.vmfb\n
","tags":["iOS"]},{"location":"building-from-source/riscv/","title":"RISC-V cross-compilation","text":"Running on a platform like RISC-V involves cross-compiling from a host platform (e.g. Linux) to a target platform (a specific RISC-V CPU architecture and operating system):
- IREE's compiler is built on the host and is used there to generate modules for the target
- IREE's runtime is built on the host for the target. The runtime is then pushed to the target to run natively.
","tags":["CPU"]},{"location":"building-from-source/riscv/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-environment-setup","title":"Host environment setup","text":"You should already be able to build IREE from source on your host platform. Please make sure you have followed the getting started steps.
","tags":["CPU"]},{"location":"building-from-source/riscv/#install-risc-v-cross-compile-toolchain-and-emulator","title":"Install RISC-V cross-compile toolchain and emulator","text":"You'll need a RISC-V LLVM compilation toolchain and a RISC-V enabled QEMU emulator.
See instructions in the following links
- Clang getting started
- RISC-V GNU toolchain
- QEMU
- RISC-V Linux QEMU
Note
The RISCV_TOOLCHAIN_ROOT
environment variable needs to be set to the root directory of the installed GNU toolchain when building the RISC-V compiler target and the runtime library.
","tags":["CPU"]},{"location":"building-from-source/riscv/#install-prebuilt-risc-v-tools-risc-v-64-bit-linux-toolchain","title":"Install prebuilt RISC-V tools (RISC-V 64-bit Linux toolchain)","text":"Execute the following script to download the prebuilt RISC-V toolchain and QEMU from the IREE root directory:
./build_tools/riscv/riscv_bootstrap.sh\n
Note
The prebuilt toolchain is built with AlmaLinux release 8.8 docker It requires glibc >= 2.28 for your host machine.
","tags":["CPU"]},{"location":"building-from-source/riscv/#support-vector-extension","title":"Support vector extension","text":"For RISC-V vector extensions support, see additional instructions
","tags":["CPU"]},{"location":"building-from-source/riscv/#configure-and-build","title":"Configure and build","text":"","tags":["CPU"]},{"location":"building-from-source/riscv/#host-configuration","title":"Host configuration","text":"Build and install on your host machine:
cmake -GNinja -B ../iree-build/ \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build/install \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n.\ncmake --build ../iree-build/ --target install\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#target-configuration","title":"Target configuration","text":"The following instruction shows how to build for a RISC-V 64-bit Linux machine. For other RISC-V targets, please refer to riscv.toolchain.cmake as a reference of how to set up the cmake configuration.
","tags":["CPU"]},{"location":"building-from-source/riscv/#risc-v-64-bit-linux-target","title":"RISC-V 64-bit Linux target","text":"cmake -GNinja -B ../iree-build-riscv/ \\\n-DCMAKE_TOOLCHAIN_FILE=\"./build_tools/cmake/riscv.toolchain.cmake\" \\\n-DIREE_HOST_BIN_DIR=$(realpath ../iree-build/install/bin) \\\n-DRISCV_CPU=linux-riscv_64 \\\n-DIREE_BUILD_COMPILER=OFF \\\n-DRISCV_TOOLCHAIN_ROOT=${RISCV_TOOLCHAIN_ROOT} \\\n-DIREE_ENABLE_CPUINFO=OFF \\\n.\ncmake --build ../iree-build-riscv/\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#running-iree-bytecode-modules-on-the-risc-v-system","title":"Running IREE bytecode modules on the RISC-V system","text":"Note
The following instructions are meant for the RISC-V 64-bit Linux target. For the bare-metal target, please refer to simple_embedding to see how to build a ML workload for a bare-metal machine.
Set the path to qemu-riscv64 Linux emulator binary in the QEMU_BIN
environment variable. If it is installed with riscv_bootstrap.sh
, the path is default at ${HOME}/riscv/qemu/linux/RISCV/bin/qemu-riscv64.
export QEMU_BIN=<path to qemu-riscv64 binary>\n
Invoke the host compiler tools to produce a bytecode module FlatBuffer:
../iree-build/install/bin/iree-compile \\\n--iree-hal-target-backends=vmvx \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
Run the RISC-V emulation:
${QEMU_BIN} \\\n-cpu rv64 \\\n-L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n../iree-build-riscv/tools/iree-run-module \\\n--device=local-task \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--function=abs \\\n--input=f32=-5\n
","tags":["CPU"]},{"location":"building-from-source/riscv/#optional-configuration","title":"Optional configuration","text":"RISC-V Vector extensions allows SIMD code to run more efficiently. To enable the vector extension for the compiler toolchain and the emulator, build the tools from the following sources:
- RISC-V toolchain is built from https://github.com/llvm/llvm-project.
- Currently, the LLVM compiler is built on GNU toolchain, including libgcc, GNU linker, and C libraries. You need to build GNU toolchain first.
- Clone GNU toolchain from: https://github.com/riscv/riscv-gnu-toolchain. Switch the \"riscv-binutils\" submodule to
git://sourceware.org/git/binutils-gdb.git
manually.
- RISC-V QEMU is built from https://gitlab.com/qemu-project/qemu/tree/v8.1.2.
The SIMD code can be generated following the IREE CPU flow with the additional command-line flags
tools/iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-target-triple=riscv64 \\\n--iree-llvmcpu-target-abi=lp64d \\\n--iree-llvmcpu-target-cpu-features=\"+m,+a,+f,+d,+zvl512b,+v\" \\\n--riscv-v-fixed-length-vector-lmul-max=8 \\\niree_input.mlir -o mobilenet_cpu.vmfb\n
Then run on the RISC-V QEMU:
${QEMU_BIN} \\\n-cpu rv64,Zve64d=true,vlen=512,elen=64,vext_spec=v1.0 \\\n-L ${RISCV_TOOLCHAIN_ROOT}/sysroot/ \\\n../iree-build-riscv/tools/iree-run-module \\\n--device=local-task \\\n--module=mobilenet_cpu.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
","tags":["CPU"]},{"location":"community/","title":"Community projects","text":"Projects built by community members:
-
The SHARK project from nod.ai uses a forked version of IREE (SHARK-Runtime), offering highly tuned performance on a large corpus of machine learning programs.
-
The IREE Bare-Metal Arm Sample shows how to build IREE with the Arm GNU Toolchain for bare-metal Arm targets using the open-source firmware libraries CMSIS and libopencm3.
-
The IREE C++ Template shows one way to integrate IREE's runtime into a project with CMake.
Official repositories:
-
iree-jax is home to IREE's support for JAX programs.
-
iree-torch contains IREE's PyTorch frontend, leveraging the torch-mlir project.
-
iree-samples includes various samples and prototypes built with IREE.
-
iree-llvm-sandbox contains experimental work by the IREE team closely related to LLVM and MLIR, usually with the aim of contributing back to those upstream projects.
"},{"location":"community/tags/","title":"Tags","text":"Website pages sorted by tag:
"},{"location":"community/tags/#android","title":"Android","text":" - Android cross-compilation
- Android LLDB debugging
"},{"location":"community/tags/#cpu","title":"CPU","text":" - RISC-V cross-compilation
- Matrix Multiplication with MMT4D
- Profiling CPUs
- CPU - Bare-Metal
- CPU
"},{"location":"community/tags/#cuda","title":"CUDA","text":" - CUDA backend
- CUDA backend design
- GPU - CUDA
"},{"location":"community/tags/#gpu","title":"GPU","text":" - CUDA backend
- Vulkan environment setup
- CUDA backend design
- Profiling GPUs using Vulkan
- GPU - CUDA
- GPU - Metal
- GPU - ROCm
- GPU - Vulkan
"},{"location":"community/tags/#jax","title":"JAX","text":" - JAX
- Extensions
- Glossary
"},{"location":"community/tags/#pytorch","title":"PyTorch","text":" - PyTorch
- Extensions
- Glossary
"},{"location":"community/tags/#python","title":"Python","text":" - JAX
- PyTorch
- TensorFlow
- TensorFlow Lite
- Python
"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":" - TFLite support via TOSA
- TensorFlow
- TensorFlow Lite
- Extensions
- Glossary
"},{"location":"community/tags/#vulkan","title":"Vulkan","text":" - Vulkan environment setup
- Profiling GPUs using Vulkan
- GPU - Vulkan
"},{"location":"community/tags/#web","title":"Web","text":" - Building with Emscripten
"},{"location":"community/tags/#ios","title":"iOS","text":" - iOS cross-compilation
- GPU - Metal
"},{"location":"community/blog/","title":"Blog","text":"Updates from the IREE team
"},{"location":"community/blog/2021-10-15-cuda-backend/","title":"CUDA backend","text":"IREE is being designed with re-targetability as a core goal: it should be possible to use IREE to target a broad spectrum of power regimes, from embedded systems to distributed clusters; and it should be possible to extend IREE to target new back-ends without having to reinvent the wheel each time.
To explore this, we recently branched out from our initial focus on low-latency mobile deployments with a goal of using IREE to target data center workloads on Nvidia CUDA. This post describes how we quickly brought up a CUDA back-end for IREE and used it to train BERT, then shares some metrics and next steps.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#bring-up","title":"Bring up","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hal-support","title":"HAL support","text":"IREE has a HAL API that abstract all the targets behind a common interface. The first step to supporting a CUDA target was to map the HAL API onto CUDA. We use the CUDA driver API to reduce dependencies and be closer to the hardware. The HAL API is based on other GPU APIs like Vulkan and Metal, so it was a natural fit for CUDA. The HAL API exposes memory allocations, basic fill and memset commands, kernel dispatch, and general command buffer handling. The original implementation uses the CUDA graph API as a graph maps naturally to command buffers. There is also an implementation using CUDA streams for comparison.
HAL exposes an API that can be tested independently, even if we are not able to create CUDA kernels yet we can test a large portion of the CUDA driver using CTS tests. Those can be run to make sure a system has the required CUDA support.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#compiler-support","title":"Compiler support","text":"CUDA has an open source backend in LLVM generating PTX that we are leveraging. Therefore IREE can create NVVM (CUDA LLVM variant) and use LLVM's backend to generate PTX. The CUDA driver will do the \"last mile compilation\" at runtime to convert PTX into the GPU's native ISA.
IREE compiler pipeline starts from linalg with tensor operands. A large part of the compiler is independent of the target.
The linalg on tensor representation of the graph is broken up into dispatch regions that are processed by NVVM Codegen. A simple implementation of the compiler is to run bufferization and convert linalg to standard followed by conversion to NVVM/LLVM. Most of those transformation can re-use upstream MLIR transformations and share it with any other backend targeting LLVM IR. Leveraging MLIR conversion to LLVM will allow us to quickly go from a simple \"hello world\" to supporting full models.
IREE code generation is based on MLIR infrastructure so each step can easily be tested independently using the MLIR lit framework.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#flatbuffer-definition","title":"FlatBuffer definition","text":"Kernels are encoded in a FlatBuffer containing the PTX code as well as the workgroup size to use for the dispatch. This allows serialization of the kernels in the IR, it is then de-serialized by the HAL layer.
table CUDAExecutableDef {\n // A map of entry point ordinals to string names as used in the shader\n // library.\n entry_points:[string];\n\n // Block sizes for each entry point.\n block_sizes:[CUDABlockSizeDef];\n\n // PTX string of the module.\n ptx_image:string;\n}\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#hello-world","title":"Hello world","text":"Together those 3 steps are enough to provide most of the functionality and we can now successfully compile full models.
The steps to reproduce running a simple op end to end through CUDA backend are described here.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#performance","title":"Performance","text":"Now that we have enabled functionality we need to look at the performance. Once again we can leverage existing MLIR transformations to speed up the developement work.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#tiling-and-distribution","title":"Tiling and distribution","text":"The first obvious step to get efficient code on CUDA is to make sure we distribute the work on enough blocks and threads to fill up the GPU. At the time of bring up not all ops were being tiled and distributed in the common IREE layer. During dispatch region creation we apply tile and fuse which will distribute the work into a set of workgroups that are mapped to CUDA blocks.
At the beginning of the code generation we look at the dispatch region and decide on the tile size for a workgroup. For CUDA we also decide the number of threads per block. We will then have a pass tiling the ops in the dispatch region a second time to distribute the work onto threads within the block.
At this stage the IR looks like the following:
%8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -> index\n %9 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%8]\n %10 = memref.subview %in0[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %11 = memref.subview %in1[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %12 = memref.subview %out[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n linalg.generic {\n indexing_maps = [affine_map<(d0) -> (d0)>,\n affine_map<(d0) -> (d0)>,\n affine_map<(d0) -> (d0)>],\n iterator_types = [\"parallel\"]}\n ins(%10, %11 :\n memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>,\n memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>)\n outs(%12 : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {\n ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors\n %13 = addf %arg1, %arg2 : f32\n linalg.yield %13 : f32\n }\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#vectorization","title":"Vectorization","text":"Even though GPUs execute most operations as scalar, memory operations are optimized to access 128 bits of data per thread. Therefore it is critical to vectorize load/store operations. After tiling to a size we vectorize the IR to get vector read/write mapping to load4/store4. This significantly improves the memory access pattern of the code generated.
This convert the previous IR to:
%8 = \"gpu.thread_id\"() {dimension = \"x\"} : () -> index\n %9 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%8]\n %10 = memref.subview %in0[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %11 = memref.subview %in1[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %12 = memref.subview %out[%9] [4] [1] : memref<128xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n %13 = vector.transfer_read %10[%c0], %cst {in_bounds = [true]} : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>, vector<4xf32>\n %14 = vector.transfer_read %11[%c0], %cst {in_bounds = [true]} : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>, vector<4xf32>\n %15 = addf %13, %14 : vector<4xf32>\n vector.transfer_write %15, %12[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>\n
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#shared-memory-optimization","title":"Shared memory optimization","text":"Nvidia GPUs have a fast shared memory that needs to be leveraged to optimize cases where we may be memory bound and have the potential to re-use memory reads.
For operations like GEMM using shared memory gives us a significant speed up. We leverage memory promotion, vector distribution and software pipelining transformations from MLIR to generate efficient copies from global to shared memory that can be interleaved with the compute work.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#optimization-pipeline","title":"Optimization pipeline","text":"Those different transformations compose to this flow:
The full dump step by step of a linalg.matmul operation can be found here.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#results-and-next-steps","title":"Results and next steps","text":"","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#gemm","title":"GEMM","text":"We compare the performance of a single GEMM operation to highly optimized library cuBLAS using mmperf framework.
The graph can be re-produced based on instructions on mmperf
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-15-cuda-backend/#future-work","title":"Future work","text":"Nod.ai has contributed an experimental HAL module for ROCM that allows us to re-use the compiler parts to support ROCM, more support is going to be added in the future.
Several performance improvements are still under progress, including optimizing the runtime allocator to reduce the host-side overhead and tuning tile sizes based profiling.
Several models are running and we will publish more detailed benchmark results in the near future.
","tags":["GPU","CUDA"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/","title":"Matrix Multiplication with MMT4D","text":"","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#introduction","title":"Introduction","text":"Matrix multiplication (matmul) is an important operation in ML workloads that poses specific challenges to code generation. For example, matmul makes repeated accesses to the same data, which makes locality of reference a top concern.
Moreover, modern CPUs instruction set architectures (ISAs) offer specialized SIMD instructions that the matmul implementation needs to use to achieve optimal performance, and these instructions expect data to be in a particular layout.
This article is about an in-development MLIR operation, linalg.mmt4d
, offering a compilation path for linalg.matmul
that is designed from the ground up for these efficiency considerations.
We are still in the early implementation phase of this linalg.mmt4d
plan, but we feel confident that we know where we are going because what we are really doing here is importing into the compiler what we have learned working on optimized matrix multiplication libraries, particularly Ruy. We know what loop schedule and kernel we want the compiler to generate \u2014 essentially the same as we wrote in Ruy, give or take additional optimizations such as fusions and constant folding that become possible now that we are doing this within a compiler. This allows us to focus on how we get the compiler to generate that schedule and kernel with purely algebraic transformations that compose and enable further compiler optimizations.
At the basis of this work is the extensible op system of the Linalg dialect in the MLIR compiler toolkit. In this case, a general purpose, mixed precision mmt4d op is defined via a high level description directly in the compiler and is then available to both users of the compiler (as a linalg.mmt4d
op) or for direct emission via Python based IR construction (i.e. for direct integration into high level frameworks without rebuilding the compiler). The ability to define such new special forms cheaply, and without any systemic framework level cost, is part of the extensibility and composition story that we expect will become increasingly important in development and deployment scenarios in the future, and in this case, it let us spring board off of high quality code generation which was already well integrated and composed well with other features of the compiler.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#existing-matrix-multplication-code-generation","title":"Existing Matrix Multplication Code Generation","text":"Let us start by discussing IREE\u2019s existing matmul code generation and highlight the issues that mmt4d
aims to overcome.
The existing approach operates in-place on the source matrices. When we discuss \"tiling\" in this paragraph, we refer exclusively to the traversal \u2014 how these source matrices are traversed by the matmul loop. There is no \"tiled layout\" here, which will be the key difference with mmt4d
below.
The destination matrix is tiled into workgroups (CPU threads) tiles, then each workgroup tile is tiled to fit some level of CPU cache, and finally each tile is further tiled to fit target architecture registers (e.g. 8x8).
That multi-level tiling means that the code works like the following loop nest:
def tiled_matmul(A, B, C, tile_m, tile_n, tile_k, tile_m_v, tile_n_v, tile_k_v):\n m = A.shape[0]\n k = A.shape[1]\n n = B.shape[1]\n for m1 in range(0, m, tile_m):\n for n1 in range(0, n, tile_n):\n for k1 in range(0, k, tile_k):\n # First level of tiling views...\n lhs_tile = A[m1:m1+tile_m, k1:k1+tile_k]\n rhs_tile = B[k1:k1+tile_k, n1:n1+tile_n]\n dst_tile = C[m1:m1+tile_m, n1:n1+tile_n]\n for mv in range(0, tile_m, tile_m_v):\n for nv in range(0, tile_n, tile_n_v):\n for kv in range(0, tile_k, tile_k_v):\n # Register tiling views...\n lhs_tile_v = lhs_tile[mv:mv+tile_m_v, kv:kv+tile_k_v]\n rhs_tile_v = rhs_tile[kv:kv+tile_k_v, nv:nv+tile_n_v]\n # kernel.\n dst_tile[mv:mv+tile_m_v, nv:nv+tile_n_v] += np.matmul(lhs_tile_v, rhs_tile_v)\n return C\n
The two main problems with this approach are:
-
Overhead to meet SIMD ISA layout requirements: In practice, the kernel needs to use specific SIMD instructions to perform the arithmetic. They expect small tiles of the matrices to be loaded in registers, in a specific layout. If the matrix data wasn't already stored in memory in such a tiled layout, then the kernel has to perform such a data rearrangement on the fly, incurring substantial overhead. For NxN matrix multiplication, the kernel performs O(N3) work on O(N2) data, so doing that rearrangement there means O(N3) overhead where O(N2) should have sufficed, as this could have been done as a pre-processing step on O(N2) data.
-
Inefficent memory traversal: For efficiency reasons, we always need tile_m_v>1
and tile_n_v>1
. That is because the higher these values, the fewer memory-load instructions are needed overall; and this is also dictated by the SIMD instructions that we want to use. But that means that the kernel is accessing simultaneously multiple rows or columns of the left-hand and right-hand side matrices. And in this existing approach, they are stored in linear layout, not in a tiled layout, so these accesses are not contiguous in memory. This is detrimental to memory access performance, meaning the CPU caches, in multiple ways. One is that these multiple non-contiguous accesses may alias each other in the L1 cache because of low associativity.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#matrix-multiplication-operation-with-4d-tiled-operands","title":"Matrix Multiplication Operation With 4D Tiled Operands","text":"For the reasons above, an efficient matmul implementation must reorder data into a tiled layout matching the target SIMD ISA and making the memory access patterns as contiguous as possible.
IREE/MLIR defaults to bufferizing all tensors into a \"row-major\" order, meaning that the last-enumerated dimension is the one that is contiguous in memory. As we prefer not to write custom bufferization code, we can't specify an alternative layout for a tensor. Fortunately, it is possible to represent a 2D tiled layout as a 4D layout. For example, tensor<2x2x2x2xf32>
can represent a 4x4 matrix made of 2x2 tiles, each of which is 2x2. The row-major layout on tensor<2x2x2x2xf32>
makes each 2x2 tile contiguous and row-major, and arranges the 2x2 tiles themselves into a row-major 2x2 layout in the overall 4x4 matrix.
Such a row-major-tiled layout is exactly what we need for the left-hand side of a matrix multiplication, because matrix multiplication traverses the left-hand side matrix row by row. But for the right-hand side matrix, we want a column-major-tiled layout. To solve this problem, we decide to implement not matrix multiplication, but matrix-multiplication-by-transposed-right-hand-side which is where the t
in the linalg.mmt4d
came from. Now such an op is happy with both the left and right-hand sides being row-major-tiled.
The following example illustrates that. In these diagrams, each matrix element is rendered its memory offset.
To compute the 2x2 block in the destination matrix, we will have to load two yellow blocks from LHS, RHS matrices respectively compute their matmul results (i.e. call the kernel), then the two blue blocks, and so on. As we can see, each tile loads data that is not contiguous. It would be better if we rearranged the elements in the following layout:
Now tiles are stored contiguously in memory and the kernel can simply load them from memory into the registers that will be directly consumed by the SIMD instructions performing the multiplications. Moreover, the kernel is now loading from just two contiguous data streams, a simple memory access pattern which is sure to be efficient (regarding caches, etc) on any reasonable target hardware.
We introduce a linalg.mmt4d
operation that performs such a matrix multiplication on matrices in a tiled layout represented as 4D tensors. That leaves the question of how to represent, within the linalg dialect, the conversions between ordinary matrices represented as 2D tensors, and these tiled matrices represented as 4D tensors. Moreover, these conversions should be tileable and decompose well. Thankfully, the transformation from 2D to 4D can be written as a reshape followed by a transpose as in the following digram:
So we can think of the outermost two dimensions of the 4D representations as the tile position in the overall matrix, and the innermost two as the element position within one tile. Hopefully the following Python pseudocode makes it more concrete:
def pack_2d_4d(operand, parallel_size, reduction_size):\n i1 = operand.shape[0] // parallel_size # M1\n i2 = parallel_size # M0\n j1 = operand.shape[1] // reduction_size # K1\n j2 = reduction_size # K0\n operand_4d = np.reshape(operand, [i1, i2, j1, j2])\n return np.transpose(operand_4d, [0, 2, 1, 3]) # [M1, K1, M0, K0]\n
Now the mmt4d operation will follow a structure as the multi level tiling, for simplicity we considered the case here where no L1 tiling is required only first level of distribution to workgroups:
def mmt4d(A, B, C, M0, N0, K0):\n M = A.shape[0]\n N = B.shape[1]\n Bt = np.transpose(B, [1, 0])\n A4d = pack_2d_4d(A, M0, K0)\n Bt4d = pack_2d_4d(Bt, N0, K0)\n M1 = A4d.shape[0]\n N1 = Bt4d.shape[0]\n K1 = A4d.shape[1]\n for m1 in range(0, M1):\n for n1 in range(0, N1):\n for k1 in range(0, K1):\n # Tile views that are contiguous in memory.\n lhs_tile = np.reshape(A4d[m1, k1, :, :], [M0, K0])\n rhs_tile = np.reshape(Bt4d[n1, k1, :, :], [N0, K0])\n # Inner kernel.\n C[m1, n1, :, :] += np.matmul(lhs_tile, np.transpose(rhs_tile, [1, 0]))\n # 4d -> 2D\n C2d = unpack_4d_2d(C)\n return C2d\n
The resulting 4D tiled matrix still needs be rearranged back to the original layout as 2D tensor:
def unpack_4d_2d(operand):\n i1 = operand.shape[0] # M1\n j1 = operand.shape[1] # N1\n i2 = operand.shape[2] # M0\n j2 = operand.shape[3] # N0\n operand_transposed = operand.transpose([0, 2, 1, 3]) # [M1, M0, N1, N0]\n return operand_transposed.reshape([i1 * i2, j1 * j2]) # [M, N]\n
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#performance-results","title":"Performance Results","text":"We benchmarked various float32 matmul problems of different sizes and the result showed that mmt4d is faster than the existing matmul implementation for bigger matrices as we can see the in the following chart:
The SIMD instruction being used here is the simplest kind, a vector*scalar
multiplication, and the storage orders of the matrices allow the existing implementation to directly load the vectors from the source matrices without any rearrangement overhead. So this case is particularly friendly to the existing code, which is why the mmt4d code is only faster for bigger matrices. To understand why mmt4d is faster in that case, we collected statistics of L1 cache misses:
This shows that in this case, the better cache-friendliness of mmt4d, thanks to its simple contiguous memory access pattern, accounts for its higher performance.
As we proceed with increasingly sophisticated SIMD targets, starting with the dot-product instructions found in current mobile devices for the int8 case and going to become generalized to all data types all the way to float32 over the next few years with upcoming ARM SIMD instructions, the advantage of mmt4d will widen for all sizes, not just the larger ones.
Part of why we feel confident about the eventual performance that our approach will achieve is that, as mentioned in the introduction, we are rebuilding within the compiler an existing library's schedule and kernel, and we have benchmark results about it.
","tags":["CPU"]},{"location":"community/blog/2021-10-13-matrix-multiplication-with-mmt4d/#conclusion","title":"Conclusion","text":"We introduced a 4d tiled representation for 2d matrix-matrix multiplication with a decomposable algebric transformations that requires only reshape and transpose of input operands, we discussed and empirically showed how that solves major drawbacks in row-major linear matmul by providing a flexible way to match different ISA layout along with better cache locality achieving near peak performance.
As was mentioned in the introduction, this work in under active development and the next immediate steps are to prove the rest of the hypothesis by:
-
Handling dynamic sizes and padding to the next multiple of the target tile size.
-
Implementing the integer case (int32 += int8 * int8
).
-
Implementing the dispatch to different SIMD ISA variants at runtime.
-
Implementing cache-friendly traversal for larger matmuls and multi-threading by interfacing with IREE's runtime dispatch.
-
Improving the generated code by fusing the 4d tiled layout with the producers and consumers of the linalg.mmt4d
.
","tags":["CPU"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/","title":"TFLite support via TOSA","text":"IREE can now execute TensorFlow Lite (TFLite) models through the use of TOSA, an open standard of common tensor operations, and a part of MLIR core. TOSA\u2019s high-level representation of tensor operations provides a common front-end for ingesting models from different frameworks. In this case we ingest a TFLite FlatBuffer and compile it to TOSA IR, which IREE takes as an input format to compile to its various backends.
Using TFLite as a frontend for IREE provides an alternative ingestion method for already existing models that could benefit from IREE\u2019s design. This enables models already designed for on-device inference to have an alternative path for execution without requiring any additional porting, while benefiting from IREE\u2019s improvements in buffer management, work dispatch system, and compact binary format. With continued improvements to IREE/MLIR\u2019s compilation performance, more optimized versions can be compiled and distributed to target devices without an update to the clientside environment.
Today, we have validated floating point support for a variety of models, including mobilenet (v1, v2, and v3) and mobilebert. More work is in progress to support fully quantized models, and TFLite\u2019s hybrid quantization, along with dynamic shape support.
","tags":["TensorFlow"]},{"location":"community/blog/2021-07-19-tflite-support-via-tosa/#examples","title":"Examples","text":"TFLite with IREE is available in Python and Java. We have a colab notebook that shows how to use IREE\u2019s python bindings and TFLite compiler tools to compile a pre-trained TFLite model from a FlatBuffer and run using IREE. We also have an Android Java app that was forked from an existing TFLite demo app, swapping out the TFLite library for our own AAR. More information on IREE\u2019s TFLite frontend is available here.
","tags":["TensorFlow"]},{"location":"developers/","title":"Developers","text":"These pages cover topics useful for project maintainers and contributors.
Caution
Some of these pages may be stale. Contributions are always welcome!
"},{"location":"developers/usage-best-practices/","title":"Usage best practices","text":"This page contains a list of best practices for getting the most out of IREE, spanning model authoring, ahead-of-time compilation, and runtime use. Treat these as a collection of ideas to consider or areas to start benchmarking when working on your own applications.
"},{"location":"developers/usage-best-practices/#introduction","title":"Introduction","text":"Common themes include:
- Give the compiler as much information as possible
- Give the compiler opportunities to batch work together or defer computation
- Keep compute devices saturated with work through pipelining
- Use dense math where possible, particularly for inner loop bodies
- Limit synchronization points between devices like CPUs and GPUs
- Profile early and often, using the right tools for each level of granularity
"},{"location":"developers/usage-best-practices/#practices-for-model-authoring","title":"Practices for model authoring","text":""},{"location":"developers/usage-best-practices/#track-state-within-your-model-when-possible","title":"Track state within your model when possible","text":"If your model is stateful prefer to store that state directly within your program rather than externalizing it through arguments and return values. By keeping state inside your program the compiler is better able to reason about it and function calls will have lower overhead.
If you do externalize state, try to pack that state into a limited number of arguments.
See the variables and state sample for further guidance on tracking and using state.
"},{"location":"developers/usage-best-practices/#limit-uses-of-dynamic-shapes","title":"Limit uses of dynamic shapes","text":"While IREE aims to support general dynamic shapes use, it is better able to optimize parts of programs where shapes are static. Slow varying dimensions like batch index or timestamp are safer uses of dynamic shapes than faster varying dimensions like the x/y/channel dimensions of images.
See the dynamic shapes sample for further guidance on using dynamic shapes.
"},{"location":"developers/usage-best-practices/#practices-for-compilation-settings","title":"Practices for compilation settings","text":"TODO: which compiler targets to use (try both CUDA and Vulkan?)
TODO: use the most specific LLVM target triple you can?
"},{"location":"developers/usage-best-practices/#tuning-compilation-heuristics","title":"Tuning compilation heuristics","text":"IREE runs its own suite of benchmarks continuously using the definitions at https://github.com/openxla/iree/tree/main/benchmarks. The flags set for these benchmarks represent the latest manually tuned values for workloads we track closely and referencing them may help with your own search for peak performance. You can use these flags in your own explorations, but note that as compiler performance matures, the existing flags will gradually be replaced with attributes for autotuning or command line options for experimental features.
"},{"location":"developers/usage-best-practices/#practices-for-runtime-use","title":"Practices for runtime use","text":"TODO: sample code, profile numbers
"},{"location":"developers/usage-best-practices/#tuning-runtime-settings","title":"Tuning runtime settings","text":"When running on the CPU, the task system flags specified in iree/task/api.c give control over how worker threads will be created. For example, the --task_topology_group_count=3
flag can be set to explicitly run on three workers rather than rely on heuristic selection that defaults to one worker per detected physical core.
If running on a single thread or system with no threading support the local-sync
HAL driver can be used instead of the multithreaded local-task
HAL driver to reduce dependencies and code size. When running with the local-sync
driver all execution happens inline on the thread invoking the IREE runtime and will block until it has completed.
"},{"location":"developers/usage-best-practices/#do-the-minimum-amount-of-work-cache-queries-and-reuse-buffers","title":"Do the minimum amount of work: cache queries and reuse buffers","text":"When using IREE's runtime libraries, try to front-load queries, particularly queries using strings that look up into maps like iree_runtime_session_call_by_name
, so that hot sections of code are doing the minimum amount of work: routing inputs through buffers, scheduling runtime calls, and routing outputs through other buffers.
"},{"location":"developers/vulkan-environment-setup/","title":"Vulkan environment setup","text":"Vulkan is a new generation graphics and compute API that provides high-efficiency, cross-platform access to modern GPUs used in a wide variety of devices from PCs and consoles to mobile phones and embedded platforms.
This page lists steps and tips for setting up and troubleshooting a Vulkan development environment. The information here is meant to be generic.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-architecture","title":"Vulkan architecture","text":"Vulkan adopts a layered architecture, which aims to better support extensiblity. There are four components involved in this architecture:
- The Vulkan Application
- The Vulkan Loader
- Vulkan Layers
- Installable Client Drivers (ICDs)
The Vulkan loader sits between the Vulkan application, which calls Vulkan APIs, and the ICDs, which implements these Vulkan APIs. Vulkan layers agument the Vulkan system to provide optional features like validation and debugging. The Vulkan loader composes a chain of requested layers, which processes the Vulkan application's API calls one by one, and finally redirects the API calls made by the Vulkan application to one or more ICDs.
It's highly recommned to read the Architecture of the Vulkan Loader Interfaces Overview to get a general understanding of what these components are and how they interact with one another.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-setup_1","title":"Vulkan environment setup","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#windows","title":"Windows","text":"You need to install the Vulkan SDK from LunarG to get the Vulkan loader.
Typically the Vulkan SDK will be installed at C:\\VulkanSDK\\<version>\\
and there will be an environment variable VULKAN_SDK
pointing to it. You can run the vulkancube
executable under the Bin\\
subdirectory of the Vulkan SDK to make sure everything works properly. If not, you probably need to check whether the graphics card is Vulkan capable or update the driver.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#debianubuntu","title":"Debian/Ubuntu","text":"For Ubuntu 20.04/22.04, it's recommended to directly install the full Vulkan SDK from LunarG's APT sources for the loader and various developer tools.
If you want to have a minimal environment, the following packages should be installed for a proper Vulkan runtime:
libvulkan1
for the Vulkan loader libvulkan.so
. - For AMD GPUs, you can install
mesa-vulkan-drivers
for the Mesa AMD Vulkan ICD, or - AMD's official VUlkan ICD.
- For NVIDIA GPUs, you can install
nvidia-vulkan-icd
on Debian for NVIDIA Vulkan ICD. - the most recent
nvidia-driver-*
package on Ubuntu for NVIDIA Vulkan ICD.
The above packages provide the Vulkan loader and ICDs. With them a Vulkan application should be able to run. You may additionally want to install
- vulkan-tools for command-line tools like
vulkaninfo
(dumping available ICDs and their capabilities) and GUI application like vulkancube
(rendering a rotating cube).
In order to develop Vulkan applications, you additionally need the following packages:
- libvulkan-dev for various Vulkan header files.
- vulkan-validationlayers for Vulkan validation layers like
VkLayer_standard_validation
.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#linux","title":"Linux","text":"For other Linux distros, please consult the corresponding package management tools for the packages needed. (And please feel free to update this doc regarding them.)
You can also download and install the Vulkan SDK tarball from LunarG. It packages the loader with many useful layers and other shader tools.
You can also build the Vulkan SDK component projects like Vulkan-Loader and Vulkan-ValidationLayers from source. But note that building these components separately you need to make sure they are consistent with one another (e.g., using the same version of Vulkan headers) to function together.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#android","title":"Android","text":"Please make sure your Android device is Vulkan capable. Vulkan is supported on Android since 7, but we track newer Android versions (10+) closely and haven't set a clear min version yet.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#multiple-vulkan-sdks","title":"Multiple Vulkan SDKs","text":"If you have multiple versions of Vulkan loaders exist, you may also need to set LD_LIBRARY_PATH
and LD_PRELOAD
to load the desired version of the loader. For example:
LD_LIBRARY_PATH={PATH_TO_VULKAN_SDK}/x86_64/lib/\nLD_PRELOAD=libvulkan.so.1\n
This can also be done by sourcing the proper setup-env.sh
from one of the downloaded Vulkan SDKs.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-environment-troubleshooting","title":"Vulkan environment troubleshooting","text":"","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#useful-environment-variables","title":"Useful environment variables","text":"There are a few environment variables that can alter the default Vulkan loader behavior and print verbose information, notably:
VK_LOADER_DEBUG
: enable loader debug messages. Setting it to all
will enable the most verbose logging from the loader. This is especially useful when trying to see what layers/ICDs are searched and used. VK_ICD_FILENAMES
: force the loader to use a specific ICD. This is especially useful when you have multiple Vulkan capable devices and want to select which one to use manually. VK_INSTANCE_LAYERS
: force the loader to enable the given layers. For example, You can force enable VK_LAYER_LUNARG_api_dump
to have a detailed dump of all Vulkan API calls made by the application. You can force enable VK_LAYER_LUNARG_core_validation
to validate the API calls made by the application. VK_LAYER_PATH
: override the loader's standard layer library search folders.
Please see the Vulkan loader's documentation for detailed explanation for these variables.
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#setting-environment-variables-for-bazel-test","title":"Setting environment variables for Bazel test","text":"Bazel runs tests in a sandbox and environment variables must be passed through to the test runner. Consider putting environment setup in a user.bazelrc
to save typing. For example:
test --test_env=\"LD_LIBRARY_PATH=/absolute/path/to/vulkan/sdk/x86_64/lib/\"\ntest --test_env=\"LD_PRELOAD=libvulkan.so.1\"\ntest --test_env=\"VK_LAYER_PATH=/absolute/path/to/additional/layers/:$VK_LAYER_PATH\"\n
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#vulkan-function-vkcreateinstance-not-available-on-android","title":"Vulkan function vkCreateInstance
not available on Android","text":"Since Android 8 Oreo, Android re-architected the OS framework with project Treble. Framework libraries and vendor libraries have a more strict and clear separation. Their dependencies are carefully scrutinized and only selected cases are allowed. This is enforced with linker namespaces.
/data/local/tmp
is the preferred directory for automating native binary tests built using NDK toolchain. They should be allowed to access libraries like libvulkan.so
for their functionality. However, there was an issue with fully treblized Android 10 where /data/local/tmp
did not have access to the linker namespaces needed by libvulkan.so
. This should be fixed now. But as typically in the Android system, it takes a long time to see the fix getting propagated, if ever.
A known workaround is to symlink the vendor Vulkan implementation under /vendor/lib[64]
as libvulkan.so
under /data/local/tmp
and use LD_LIBRARY_PATH=/data/local/tmp
when invoking IREE executables.
For Qualcomm Adreno GPUs, the vendor Vulkan implementation is at /vendor/lib[64]/hw/vulkan.*.so
. So for example for Snapdragon 865:
adb shell ln -s /vendor/lib64/hw/vulkan.kona.so /data/local/tmp/libvulkan.so\n
For ARM Mali GPUs, there is only one monolithic driver (/vendor/lib[64]/libGLES_mali.so
) for OpenGL and Vulkan and the Vulkan vendor driver (/vendor/lib[64]/hw/vulkan.*.so
) is just a symlink to it. So for example:
adb shell ln -s /vendor/lib64/libGLES_mali.so /data/local/tmp/libvulkan.so\n
","tags":["GPU","Vulkan"]},{"location":"developers/vulkan-environment-setup/#ssh-on-linux-and-x-forwarding","title":"SSH on Linux and X forwarding","text":"Physical devices enumerated on NVIDIA drivers can be affected by the DISPLAY
environment variable. If you are running under an SSH session to Linux or using chrome remote desktop and have problems with physical device enumeration, you probably want to check the DISPLAY
environment and set it to point to a display at the server side, for example:
export DISPLAY=:0\n
","tags":["GPU","Vulkan"]},{"location":"developers/building/bazel/","title":"Building with Bazel","text":"This page walks through building IREE from source using the Bazel build system.
Warning
Bazel build support is primarily for internal project infrastructure. We strongly recommend using CMake instead.
Our Bazel configuration is also only tested on Linux. Windows and macOS may be unstable.
"},{"location":"developers/building/bazel/#prerequisites","title":"Prerequisites","text":"Linux macOS Windows -
Install Bazel, matching IREE's .bazelversion
by following the official docs.
-
Install a compiler such as Clang (GCC is not fully supported).
sudo apt install clang\n
Set environment variables for Bazel:
export CC=clang\nexport CXX=clang++\n
-
Install Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
-
Install Homebrew:
/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)\"\n
-
Install Bazel, matching IREE's .bazelversion
by following the official docs or via Homebrew:
brew install bazel\n
-
Install Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
Tip
You can simplify installation by using a package manager like Scoop or Chocolatey.
-
Install Bazel, matching IREE's .bazelversion
by following the official docs.
Also install MSYS2 by following Bazel's documentation.
-
Install Python3 (docs here) and Python build requirements:
python -m pip install -r runtime/bindings/python/iree/runtime/build_requirements.txt\n
-
Install the full Visual Studio or \"Build Tools For Visual Studio\" from the downloads page then set the BAZEL_VS
environment variable:
> $env:BAZEL_VS = \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\"\n
"},{"location":"developers/building/bazel/#quickstart-clone-and-build","title":"Quickstart: clone and build","text":""},{"location":"developers/building/bazel/#clone","title":"Clone","text":"Use Git to clone the IREE repository and initialize its submodules:
git clone https://github.com/openxla/iree.git\ncd iree\ngit submodule update --init\n
Configure Bazel:
# This generates a `configured.bazelrc` file by analyzing your environment.\n# Skipping this step will make it difficult to select your platform/compiler.\npython3 configure_bazel.py\n
Linux macOS Windows (No Linux-specific tips for configuring)
(No macOS-specific tips for configuring)
Tip
Clone to a short path like C:\\projects\\
to avoid issues with Windows maximum path lengths (260 characters).
Tip
configure_bazel.py
only detects that you have Windows and will output the default --config=windows
to configured.bazelrc
, which assumes the latest version of MSVC. To avoid some warnings, you may want to replace it with (for example) --config=msvc2022
.
"},{"location":"developers/building/bazel/#build","title":"Build","text":"Run all core tests:
bazel test -k //...\n
Tip
You can add flags like --test_env=IREE_VULKAN_DISABLE=1
to your test command to change how/which tests run.
In general, build artifacts will be under the bazel-bin
directory at the top level.
"},{"location":"developers/building/bazel/#recommended-userbazelrc","title":"Recommended user.bazelrc
","text":"You can put a user.bazelrc at the root of the repository and it will be ignored by git.
Linux macOS Windows build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n
build --disk_cache=/tmp/bazel-cache\n\n# Use --config=debug to compile IREE and LLVM without optimizations\n# and with assertions enabled.\nbuild:debug --config=asserts --compilation_mode=opt '--per_file_copt=iree|llvm@-O0' --strip=never\n\n# Use --config=asserts to enable assertions. This has to be done globally:\n# Code compiled with and without assertions can't be linked together (ODR violation).\nbuild:asserts --compilation_mode=opt '--copt=-UNDEBUG'\n
build --disk_cache=c:/bazelcache\nbuild:debug --compilation_mode=dbg --copt=/O2 --per_file_copt=iree@/Od --strip=never\n
"},{"location":"developers/building/bazel/#whats-next","title":"What's next?","text":""},{"location":"developers/building/bazel/#take-a-look-around","title":"Take a Look Around","text":"Build all of IREE's 'tools' directory:
bazel build tools/...\n
Check out what was built:
ls bazel-bin/tools/\n./bazel-bin/tools/iree-compile --help\n
Translate a MLIR file and execute a function in the compiled module:
# iree-run-mlir <compiler flags> [input.mlir] <runtime flags>\n$ ./bazel-bin/tools/iree-run-mlir \\\n--iree-hal-target-backends=vmvx --print-mlir \\\n./samples/models/simple_abs.mlir \\\n--input=f32=-2\n
"},{"location":"developers/building/cmake-options/","title":"CMake options","text":""},{"location":"developers/building/cmake-options/#frequently-used-cmake-options","title":"Frequently-used CMake options","text":""},{"location":"developers/building/cmake-options/#cmake_build_type","title":"CMAKE_BUILD_TYPE
","text":" - type: STRING
Sets the build type. Possible values are Release
, Debug
, RelWithDebInfo
and MinSizeRel
. If unset, build type is set to Release
.
"},{"location":"developers/building/cmake-options/#cmake_lang_compiler","title":"CMAKE_<LANG>_COMPILER
","text":" - type: STRING
This is the command that will be used as the <LANG>
compiler, which are C
and CXX
in IREE. These variables are set to compile IREE with clang
or rather clang++
. Once set, these variables can not be changed.
"},{"location":"developers/building/cmake-options/#iree-specific-cmake-options","title":"IREE-specific CMake options","text":"This gives a brief explanation of IREE specific CMake options and variables.
"},{"location":"developers/building/cmake-options/#iree_enable_runtime_tracing","title":"IREE_ENABLE_RUNTIME_TRACING
","text":" - type: BOOL
Enables instrumented runtime tracing. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_enable_compiler_tracing","title":"IREE_ENABLE_COMPILER_TRACING
","text":" - type: BOOL
Enables instrumented compiler tracing. This requires that IREE_ENABLE_RUNTIME_TRACING
also be set. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_compiler","title":"IREE_BUILD_COMPILER
","text":" - type: BOOL
Builds the IREE compiler. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_tests","title":"IREE_BUILD_TESTS
","text":" - type: BOOL
Builds IREE unit tests. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_docs","title":"IREE_BUILD_DOCS
","text":" - type: BOOL
Builds IREE documentation files. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_samples","title":"IREE_BUILD_SAMPLES
","text":" - type: BOOL
Builds IREE sample projects. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_python_bindings","title":"IREE_BUILD_PYTHON_BINDINGS
","text":" - type: BOOL
Builds the IREE python bindings. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite","title":"IREE_BUILD_BINDINGS_TFLITE
","text":" - type: BOOL
Builds the IREE TFLite C API compatibility shim. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_bindings_tflite_java","title":"IREE_BUILD_BINDINGS_TFLITE_JAVA
","text":" - type: BOOL
Builds the IREE TFLite Java bindings with the C API compatibility shim. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_build_experimental_remoting","title":"IREE_BUILD_EXPERIMENTAL_REMOTING
","text":" - type: BOOL
Builds experimental remoting component. Defaults to OFF
.
"},{"location":"developers/building/cmake-options/#iree_hal_driver_defaults","title":"IREE_HAL_DRIVER_DEFAULTS
","text":" - type: BOOL
Default setting for each IREE_HAL_DRIVER_*
option.
"},{"location":"developers/building/cmake-options/#iree_hal_driver_","title":"IREE_HAL_DRIVER_*
","text":" - type: BOOL
Individual options enabling the build for each runtime HAL driver.
"},{"location":"developers/building/cmake-options/#iree_target_backend_defaults","title":"IREE_TARGET_BACKEND_DEFAULTS
","text":" - type: BOOL
Default setting for each IREE_TARGET_BACKEND_*
option.
"},{"location":"developers/building/cmake-options/#iree_target_backend_","title":"IREE_TARGET_BACKEND_*
","text":" - type: BOOL
Individual options enabling the build for each compiler target backend.
"},{"location":"developers/building/cmake-options/#iree_input_","title":"IREE_INPUT_*
","text":" - type: BOOL
Individual options enabling each set of input dialects.
"},{"location":"developers/building/cmake-options/#iree_output_format_c","title":"IREE_OUTPUT_FORMAT_C
","text":" - type: BOOL
Enables the vm-c compiler output format, using MLIR EmitC. Defaults to ON
.
"},{"location":"developers/building/cmake-options/#iree_dev_mode","title":"IREE_DEV_MODE
","text":" - type: BOOL
Configure settings to optimize for IREE development (as opposed to CI or release). Defaults to OFF
. For example, this will downgrade some compiler diagnostics from errors to warnings.
"},{"location":"developers/building/cmake-options/#iree_enable_lld","title":"IREE_ENABLE_LLD
","text":" - type: BOOL
Use lld when linking. Defaults to OFF
. This option is equivalent to -DIREE_USE_LINKER=lld
. The option IREE_ENABLE_LLD
and IREE_USE_LINKER
can not be set at the same time.
"},{"location":"developers/building/cmake-options/#iree_enable_asan","title":"IREE_ENABLE_ASAN
","text":" - type: BOOL
Enable address sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_msan","title":"IREE_ENABLE_MSAN
","text":" - type: BOOL
Enable memory sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_tsan","title":"IREE_ENABLE_TSAN
","text":" - type: BOOL
Enable thread sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#iree_enable_ubsan","title":"IREE_ENABLE_UBSAN
","text":" - type: BOOL
Enable undefiend behavior sanitizer if the current build type is Debug and the compiler supports it.
"},{"location":"developers/building/cmake-options/#cross-compilation","title":"Cross-compilation","text":"When cross compiling (using a toolchain file like android.toolchain.cmake
), first build and install IREE's tools for your host configuration, then use the IREE_HOST_BIN_DIR
CMake option to point the cross compiled build at the host tools.
"},{"location":"developers/building/cmake-with-ccache/","title":"CMake with ccache
","text":"ccache
is a compilation cache. In principle, just prepending compiler invocations with ccache
is all one needs to enable it, e.g.
ccache clang foo.c -c -o foo.o\n
takes care of executing clang
with these arguments and caches the output file foo.o
. The next invocation then skips executing clang
altogether.
When the cache is hit, the speedup is such that the \"compilation\" becomes essentially free. However, ccache
only caches compilation, not linking.
Here a few scenarios where ccache
helps:
- Incremental rebuilds. While
cmake
always tries to avoid unnecessary work in incremental rebuilds, it can only make simple decisions based on file timestamps. ccache
sees deeper: if the raw source code isn't readily a cache hit, it will then try again after preprocessing and discarding comments. - One pain point with
cmake
is having to start over from a clean build directory from time to time, which by default means paying again the full cost of a cold build. Thankfully ccache
keeps its cache outside of any cmake
build directory, so the first build in the new clean build directory may be very fast.
"},{"location":"developers/building/cmake-with-ccache/#installing-and-setting-up-ccache","title":"Installing and setting up ccache
","text":"ccache
is available on most platforms. On Debian-based Linux distributions, do:
sudo apt install ccache\n
The one ccache
setting that you probably need to configure is the maximum cache size. The default 5G
is too small for our purposes. To set the cache max size, do this once:
ccache --max-size=20G\n
Tip: At the moment (late 2020), most of the code we're building is third_party/llvm-project
so the fundamental limiting factor to how far we can cache away rebuilds is how often that dependency gets updated. Given how frequently it currently is updated, I'm finding that 20G
is enough to make the ccache
size not be the limiting factor.
"},{"location":"developers/building/cmake-with-ccache/#telling-cmake-to-use-ccache","title":"Telling CMake to use ccache
","text":"Use the CMake COMPILER_LAUNCHER functionality by setting CMAKE_C_COMPILER_LAUNCHER=ccache
and CMAKE_CXX_COMPILER_LAUNCHER=ccache
in your
Notes:
- This approach only works with the
Ninja
and Makefile
generators (cmake -G
flag). When using other generators, another approach is needed, based on wrapping the compiler in a script that prepends ccache
. See this article.
"},{"location":"developers/building/cmake-with-ccache/#ensuring-that-ccache-is-used-and-monitoring-cache-hits","title":"Ensuring that ccache
is used and monitoring cache hits","text":"The ccache -s
command dumps statistics, including a cache hit count and ratio. It's convenient to run periodically with watch
in a separate terminal:
watch -n 0.1 ccache -s # update the stats readout every 0.1 seconds\n
"},{"location":"developers/building/emscripten/","title":"Building with Emscripten","text":"Emscripten is a complete compiler toolchain to WebAssembly, using LLVM, with a special focus on speed, size, and the Web platform. Emscripten can be used to compile parts of IREE to WebAssembly for execution within web browsers or other Wasm runtimes.
","tags":["Web"]},{"location":"developers/building/emscripten/#status","title":"Status","text":"IREE's runtime can be compiled through Emscripten in some limited configurations. More of the runtime will be supported over time.
IREE's compiler can be compiled through Emscripten with local changes. More work is needed for this to be generally supported.
","tags":["Web"]},{"location":"developers/building/emscripten/#prerequisites","title":"Prerequisites","text":"Read https://emscripten.org/docs/getting_started/downloads.html and run
./emsdk install latest\n./emsdk activate latest\nsource ./emsdk_env.sh\n
","tags":["Web"]},{"location":"developers/building/emscripten/#building-irees-runtime-with-emscripten","title":"Building IREE's runtime with Emscripten","text":"","tags":["Web"]},{"location":"developers/building/emscripten/#host-configuration","title":"Host configuration","text":"Build and install at least the compiler tools on your host machine, or install them from a binary distribution:
$ cmake -G Ninja -B ../iree-build-host/ \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DCMAKE_INSTALL_PREFIX=../iree-build-host/install \\\n.\n$ cmake --build ../iree-build-host/ --target install\n
","tags":["Web"]},{"location":"developers/building/emscripten/#target-configuration","title":"Target configuration","text":"$ emcmake cmake -G Ninja -B ../iree-build-emscripten/ \\\n-DCMake_BUILD_TYPE=Release \\\n-DIREE_HOST_BIN_DIR=$(realpath ../iree-build-host/install/bin) \\\n-DIREE_BUILD_TESTS=OFF \\\n-DIREE_BUILD_COMPILER=OFF \\\n.\n
Build:
cmake --build ../iree-build-emscripten/ \\\n--target iree_samples_simple_embedding_simple_embedding_vmvx_sync\n
","tags":["Web"]},{"location":"developers/building/emscripten/#load-into-a-webassembly-environment","title":"Load into a WebAssembly environment","text":"Copy the outputs from the build process (e.g. simple_embedding_vmvx_sync.js
and simple_embedding_vmvx_sync.wasm
) into your application and follow instructions at either https://webassembly.org/getting-started/developers-guide/ or https://developer.mozilla.org/en-US/docs/WebAssembly/Loading_and_running.
","tags":["Web"]},{"location":"developers/debugging/android-with-lldb/","title":"Android LLDB debugging","text":"This doc shows how to use LLDB to debug native binaries on Android. For a more complete explanation, see the official LLDB documentation on remote debugging.
","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#prerequisites","title":"Prerequisites","text":"We assume the following setup:
- Android NDK is installed and the
ANDROID_NDK
environment variable is set to the installation path. - Your Android device connected and configured for
adb
. - The Android binary of interest is already compiled and the command to run it (in
adb shell
) is <your-binary> [program args...]
. This does not have to be a proper Android app with a manifest, etc.
","tags":["Android"]},{"location":"developers/debugging/android-with-lldb/#running-manually","title":"Running Manually","text":" -
Push the toolchain files, including lldb-server
, to your device:
adb shell \"mkdir -p /data/local/tmp/tools\"\nadb push \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/aarch64/* /data/local/tmp/tools\n
You may need to adjust the clang toolchain version to match the one in your NDK. You can find it with find \"$ANDROID_NDK/toolchains/llvm/prebuilt\" -name lldb-server
.
-
Set up port forwarding. We are going to use port 5039 but you are free to pick a different one:
adb forward tcp:5039 tcp:5039\n
-
Start an lldb-server
in a new interactive adb shell:
adb shell\n/data/local/tmp/tools/lldb-server platform --listen '*:5039' --server\n
-
Launch lldb
, connect to the server and run the binary:
lldb -o 'platform select remote-android' \\\n-o 'platform connect connect://:5039' \\\n-o 'platform shell cd /data/local/tmp'\ntarget create <your-binary>\nrun [program args...]\n
You can either use the system lldb
or a prebuilt under \"$ANDROID_NDK\"/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.6/lib/linux/<your-host-arch>
.
Explanation: each -o
(short for --one-shot
) tells lldb to execute a command on startup. You can run those manually in the lldb shell, if you prefer. Then, we tell lldb which working directory to use, where to find the executable, and what command line arguments to use.
","tags":["Android"]},{"location":"developers/debugging/compile-time-regressions/","title":"Compile time regression debugging","text":"So the IREE compiler used to compile a program quickly, but it is now slower. What do you do?
"},{"location":"developers/debugging/compile-time-regressions/#initial-information-gathering","title":"Initial information gathering","text":"Try to answer as many of these questions as you can:
When did compilation get slower?
A specific git commit is ideal, but \"sometime in the last week\" is a good starting point. You'll ultimately want to find a culprit release or git commit that changed the compiler code.
How much slower did compilation get?
Be specific - did it jump from 1 minute to 2 minutes, or 1 minute to 1 hour? Identifying the scale of the regression can help set the priority to investigate it.
What is the full compile command?
Try to extract the input program and full list of flags passed to the compiler binary so that others can reproduce what you're seeing. Try to distill this as much as possible to using just native tools (no Python or other framework layers).
What environment is the compiler running in?
Are you using a Debug
build, or a release build? What operating system and size machine is running the compiler (e.g. Linux developer machine, or a smaller system)?
"},{"location":"developers/debugging/compile-time-regressions/#culprit-finding-and-bisecting","title":"Culprit finding and bisecting","text":"If you only have a rough idea of when something changed and want to narrow that down to a specific code change, bisecting can help.
"},{"location":"developers/debugging/compile-time-regressions/#running-git-bisect","title":"Running git bisect
","text":"Building the compiler from source and using git bisect
will let you pinpoint specific commits in IREE, though it typically won't let you step through changes in submodules (e.g. MLIR updates in third_party/llvm-project/
).
Tip: Configure ccache if you'll be rebuilding the compiler while bisecting
A manual workflow with git bisect
looks like this:
git bisect start --first-parent\ngit bisect good [<rev>]\ngit bisect bad [<rev>]\n\n# Read the prompts from the command as it runs\n# At each step, test the compiler:\n# git submodule update\n# cmake --build build/ --target iree-compile\n# ./build/tools/iree-compile <args>\n# attach Tracy, observe timing, print IR, etc. to determine if fast or slow\n# if fast, `git bisect good`\n# if slow, `git bisect bad`\n# repeat\n
An automated workflow can use git bisect run
and a script:
# run_bisect.sh\ngit submodule update\ncmake --build build/ --target iree-compile\n# Other logic here\n
git bisect start --first-parent\ngit bisect good [<rev>]\ngit bisect bad [<rev>]\ngit bisect run run_bisect.sh\n
"},{"location":"developers/debugging/compile-time-regressions/#sample-compile-executable-sources-individually-with-a-timeout","title":"Sample: compile executable sources individually with a timeout","text":"#!/bin/bash\n\nset -xeuo pipefail\n\n# --------------------------------------------------------------------------- #\n# Settings #\n# --------------------------------------------------------------------------- #\n\nINPUT_FILE_PATH=\"/path/to/program.mlirbc\"\nTMP_DIR=\"../iree-tmp\"\n\ndeclare -a COMPILER_FLAGS=(\n\"--iree-input-type=stablehlo\"\n\"--iree-hal-target-backends=cuda\"\n\"--iree-hal-cuda-llvm-target-arch=sm_80\"\n)\n\nTIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10\n\n# --------------------------------------------------------------------------- #\n# Utility functions #\n# --------------------------------------------------------------------------- #\n\n# Call to have `git bisect` skip this commit (don't mark as good _or_ bad)\n# https://git-scm.com/docs/git-bisect#_bisect_run\nskip_on_error() {\n>&2 echo \"** Skipping due to error: $1 **\"\nexit 125 # Special exit code for `git bisect skip`\n}\n\n# --------------------------------------------------------------------------- #\n# Main script #\n# --------------------------------------------------------------------------- #\n\n# Store git version hash, so we can dump artifacts to unique directories later.\nGIT_SHA=\"$(git rev-parse --short HEAD)\"\n\necho \"** Building iree-compile at ${GIT_SHA} **\"\n\n# The `git bisect` command only checks out a commit, so update submodules.\ngit submodule update\n\n# Build the compiler. You'll want ccache configured to make this fast!\ncmake --build ../iree-build/ --target iree-compile || skip_on_error \"CMake build failed\"\n\n# Run the compiler, dumping executable sources and stopping.\nSOURCES_DIR=\"${TMP_DIR}/sources-${GIT_SHA}\"\necho \"** Running iree-compile at ${GIT_SHA}, dumping sources to ${SOURCES_DIR} **\"\n../iree-build/tools/iree-compile \\\n${INPUT_FILE_PATH} \\\n${COMPILER_FLAGS[@]} \\\n--iree-hal-dump-executable-sources-to=${SOURCES_DIR} \\\n--compile-to=executable-sources \\\n-o /dev/null\n\n# Run the compiler again on each executable individually.\necho \"** Running iree-compile at ${GIT_SHA} for each executable source **\"\nSOURCES=($(ls -1 ${SOURCES_DIR}))\nfor SOURCE in \"${SOURCES[@]}\"; do\necho \" * Compiling: ${SOURCE} *\"\ntimeout --verbose ${TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE} \\\n../iree-build/tools/iree-compile ${SOURCES_DIR}/${SOURCE} \\\n${COMPILER_FLAGS[@]} \\\n--compile-mode=hal-executable \\\n-o /dev/null\ndone\n
"},{"location":"developers/debugging/compile-time-regressions/#profiling-and-tracing","title":"Profiling and tracing","text":"If you want to understand why the compiler is fast or slow, or if you want to compare performance in detail between two versions, consider these profiling options.
"},{"location":"developers/debugging/compile-time-regressions/#mlir-pass-timing","title":"MLIR pass timing","text":"The -mlir-timing
flag enables Pass Timing instrumentation. Once the compiler finishes running, this prints a report like
===-------------------------------------------------------------------------===\n... Pass execution timing report ...\n===-------------------------------------------------------------------------===\nTotal Execution Time: 0.0203 seconds\n\n ---Wall Time--- --- Name ---\n 0.0047 ( 55.9%) Canonicalizer\n 0.0019 ( 22.2%) VerifierPass\n 0.0016 ( 18.5%) LLVMLoweringPass\n 0.0003 ( 3.4%) CSE\n 0.0002 ( 1.9%) (A) DominanceInfo\n 0.0084 (100.0%) Total\n
This is easy data to collect, especially remotely over SSH, but it might not paint a complete picture and requires waiting for compilation to finish.
"},{"location":"developers/debugging/compile-time-regressions/#using-tracy","title":"Using Tracy","text":"See our documentation on profiling with Tracy. For compile time regressions, pay particular attention to the different compilation phases (Flow/Stream/HAL), how many times TranslateExecutablesPass
runs, and if there are outlier passes that take significantly longer to run than others.
Here are some previous analyses for inspiration:
- https://github.com/openxla/iree/issues/12033
- https://github.com/openxla/iree/issues/12035
- https://github.com/openxla/iree/issues/12183
- https://github.com/openxla/iree/issues/13189
Example slow trace:
Example fast trace:
Example sampling statistics showing 10s of minutes in LLVM codegen:
"},{"location":"developers/debugging/compile-time-regressions/#stepping-through-compiler-ir","title":"Stepping through compiler IR","text":"Debugging an MLIR-based compiler like IREE usually involves reading IR at some point. For compile time regressions, it helps to snapshot the IR at a few key phases and look for differences between fast compilation and slow compilation.
Here is one useful flag combination:
--mlir-disable-threading \\\n--mlir-elide-elementsattrs-if-larger=8 \\\n--mlir-print-ir-after=iree-hal-materialize-interfaces\n
"},{"location":"developers/debugging/integration-tests/","title":"Integration test debugging","text":"This document includes tips for triaging integration test correctness issues. Feel free to reach out to @hanhanW or ask questions on Discord for more help.
"},{"location":"developers/debugging/integration-tests/#general-tips","title":"General tips","text":""},{"location":"developers/debugging/integration-tests/#narrow-down-reproducers","title":"Narrow down reproducers","text":" - Models themselves can be large, and IREE breaks models into dispatches/kernels and then launches those individually. Program outputs could diverge starting from any individual launch. To get a smaller reproducer, you can use --iree-flow-trace-dispatch-tensors.
- You can compare the logs between builds/backends to get an idea about which dispatch results in wrong outputs. The dumped inputs can be reused in a flagfile.
Once a suspicious dispatch is identified, we can create a test case based on the dispatch function. The dispatch function can be derived after the OutlineDispatchRegions
pass. The function signatures have to be modified manually. You'll have to put flow.dispatch.tensor.load
variables to function arguments, and replace flow.dispatch.tensor.store
with return
op.
Note: This only works when dispatch formation logics are identical between runs.
"},{"location":"developers/debugging/integration-tests/#iree-samples-repository-tests","title":"iree-samples repository tests","text":"Follow README to run the model. The MLIR files will be generated. You'll find the saved file from log. E.g.,
[ RUN ] MobilenetV2Int8Test.test_compile_tflite\nI0401 17:27:04.084272 140182373025024 test_util.py:119] Setting up for IREE\nI0401 17:27:04.085064 140182373025024 binaries.py:218] Invoke IREE Pipeline:\n /tmp/iree-samples/iree-samples.venv/lib/python3.9/site-packages/iree/tools/tflite/iree-import-tflite\n /tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/model.tflite\n --mlir-print-debuginfo\n --save-temp-tfl-input=/tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/tflite.mlir\n --save-temp-iree-input=/tmp/iree-samples/tflitehub/tmp/mobilenet_v2_int8_test.py/tosa.mlir\n
Unfortunately, the artifacts are not dumped in the runs. There is an issue for tracking this. A workaround can be found in the issue.
"},{"location":"developers/debugging/integration-tests/#tensorflow-integration-tests","title":"TensorFlow integration tests","text":"These are steps to reproduce/address failures in TF/TFLite integration tests. These instructions are most stable on Linux, though they may work with a few tweaks on Windows and macOS.
All steps here assume starting from the IREE root directory.
-
First create a Python virtual environment to install packages into:
python -m venv iree-tf.venv\nsource iree-tf.venv/bin/activate\n\n# Install test requirements\npython -m pip install -r ./integrations/tensorflow/test/requirements.txt\n
-
Install IREE's tools and Python bindings or build them from source
Install distributed packages
# Install packages from nightly releases\n# This should work for most cases, as the importers change infrequently\npython -m pip install \\\niree-compiler iree-runtime iree-tools-tf iree-tools-tflite \\\n--find-links https://iree.dev/pip-release-links.html\n
OR build from source
# Build Python bindings from source\ncmake -G Ninja -B ../iree-build/ -DIREE_BUILD_PYTHON_BINDINGS=ON .\ncmake --build ../iree-build/\n\n# Add IREE built-from-source Python packages to PYTHONPATH\nsource .env\n\n# Install IREE TF/TFLite Python packages\npython -m pip install integrations/tensorflow/python_projects/iree_tf\npython -m pip install integrations/tensorflow/python_projects/iree_tflite\n
-
Run the python test command line
The command can be obtained from the run file. For example, if iree_tfl_tests/llvmcpu_posenet_i8.run
failed,
cd integrations/tensorflow/test/\ncat iree_tfl_tests/llvmcpu_posenet_i8.run\n\n# REQUIRES: llvmcpu\n# RUN: %PYTHON -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=%t\n\ncd python/\npython -m iree_tfl_tests.posenet_i8_test --target_backend=llvmcpu --artifacts_dir=/tmp/posenet_i8_failure\n
Note that the command can only be run under integrations/tensorflow/test/python
directory.
-
Extract intermediate files and use with native tools
The test will create an iree_input.mlir
in the temp directory specified. Those can then be fed into iree-compile
(built locally to reproduce the error)
iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-input-type=stablehlo \\\niree_input.mlir\n
"},{"location":"developers/debugging/releases/","title":"Release debugging playbook","text":""},{"location":"developers/debugging/releases/#tools-and-locations","title":"Tools and Locations","text":" .github/workflows/build_package.yml
: Release packaging jobs build_tools/github_actions/build_dist.py
: Main script to build various release packages (for all platforms). We usually use this when reproing to approximate exactly what the CI does. Assumes a subdirectory of c
and writes builds to iree-build
and iree-install
as a peer of it. To use locally, just symlink your source dir as c
in an empty directory (versus checking out).
"},{"location":"developers/debugging/releases/#mapping-releases-back-to-git-commits","title":"Mapping releases back to git commits","text":"The source IREE commit SHA is embeded into pip releases in a few places. Starting in a python venv, you can find the IREE commit from both the shell:
\"$(find . -name 'iree-compile' -executable)\" --version\nIREE (https://iree.dev):\n IREE compiler version 20231016.553 @ f1cb2692a086738d7f16274b9b3af6d2c15ef133\n LLVM version 18.0.0git\n Optimized build\n
and the Python API:
python -c \"import iree.compiler.version as v; print(v.REVISIONS['IREE'])\"\nf1cb2692a086738d7f16274b9b3af6d2c15ef133\n
"},{"location":"developers/debugging/releases/#manylinux-releases","title":"Manylinux releases","text":"The Linux releases are done in a manylinux2014 docker container. At the time of this writing, it has gcc 9.3.1 and Python versions 3.5 - 3.9 under /opt/python
. Note that this docker image approximates a 2014 era RHEL distro, patched with backported (newer) dev packages. It builds with gcc and BFD linker unless if you arrange otherwise. yum
can be used to get some packages.
Get a docker shell (see exact docker image in build_package.yml workflow):
docker run --rm -it -v $(pwd):/work/c stellaraccident/manylinux2014_x86_64-bazel-4.2.2:latest /bin/bash\n
Remember that docker runs as root unless if you take steps otherwise. Don't touch write files in the /work/c
directory to avoid scattering root owned files on your workstation.
The default system Python is 2.x, so you must select one of the more modern ones:
export PATH=/opt/python/cp39-cp39/bin:$PATH\n
Build core installation:
# (from within docker)\ncd /work\npython ./c/build_tools/github_actions/build_dist.py main-dist\n\n# Also supports:\n# main-dist\n# py-runtime-pkg\n# py-xla-compiler-tools-pkg\n# py-tflite-compiler-tools-pkg\n# py-tf-compiler-tools-pkg\n
You can git bisect
on the host and keep running the above in the docker container. Note that every time you run build_dist.py
, it deletes the cmake cache but otherwise leaves the build directory (so it pays the configure cost but is otherwise incremental). You can just cd iree-build
and run ninja
for faster iteration (after the first build or if changing cmake flags). Example:
Extended debugging in the manylinux container:
cd /work/iree-build\n# If doing extended debugging in the container, these may make you happier.\nyum install ccache devtoolset-9-libasan-devel gdb\n\n# Get an LLVM symbolizer.\nyum install llvm9.0\nln -s /usr/bin/llvm-symbolizer-9.0 /usr/bin/llvm-symbolizer\n\n# You can manipulate cmake flags. These may get you a better debug experience.\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASAN=ON -DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=gold -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .\n\nninja\n\n# Or you may need this if buggy LLVM tools (like mlir-tblgen) are leaking :(\nASAN_OPTIONS=\"detect_leaks=0\" ninja\n
Other tips:
- If debugging the runtime, you may have a better time just building the Release mode
main-dist
package above once, which will drop binaries in the iree-install
directory. Then build the py-runtime-pkg
or equiv and iterate further in the build directory. Ditto for TF/XLA/etc.
"},{"location":"developers/debugging/releases/#testing-releases-on-your-fork","title":"Testing releases on your fork","text":"To avoid interrupting the regular releases published on the IREE github, you can test any changes to the release process on your own fork. Some setup is required before these github actions will work on your fork and development branch.
You can run schedule_candidate_release.yml
with a workflow dispatch from the actions tab. If you want to test using a commit other than the latest green on your main
branch, modify the section that identifies the latest green commit to search from another commit or just hardcode one.
To speed up build_package.yml
, you may want to comment out some of the builds here. The py-pure-pkgs
build takes only ~2 minutes and the py-runtime-pkg
build takes ~5, while the others can take several hours.
From your development branch, you can manually run the Schedule Snapshot Release action, which invokes the Build Release Packages action, which finally invokes the Validate and Publish Release action. If you already have a draft release and know the release id, package version, and run ID from a previous Build Release Packages run, you can also manually run just the Validate and Publish Release action.
"},{"location":"developers/debugging/sanitizers/","title":"Sanitizers (ASan/MSan/TSan)","text":"AddressSanitizer, MemorySanitizer and ThreadSanitizer are tools provided by clang
to detect certain classes of errors in C/C++ programs. They consist of compiler instrumentation (so your program's executable code is modified) and runtime libraries (so e.g. the malloc
function may get replaced).
They are abbreviated as \"ASan\", \"MSan\" and \"TSan\" respectively.
They all incur large overhead, so only enable them while debugging.
Tool Detects Helps debug what? Slowdown Memory overhead Android support ASan Out-of-bounds accesses, use-after-free, use-after-return, memory leaks Crashes, non-deterministic results, memory leaks 2x 3x Yes MSan Uninitialized memory reads Non-deterministic results 3x ? Yes TSan Data races Many bugs in multi-thread code 5x-15x 5x-10x No Note
See this documentation on leak detection. It is only enabled by default on some platforms.
"},{"location":"developers/debugging/sanitizers/#support-status-and-how-to-enable-each-sanitizer","title":"Support status and how to enable each sanitizer","text":""},{"location":"developers/debugging/sanitizers/#asan-addresssanitizer","title":"ASan (AddressSanitizer)","text":"Enabling ASan in the IREE build is a simple matter of setting the IREE_ENABLE_ASAN
CMake option:
cmake -DIREE_ENABLE_ASAN=ON ...\n
"},{"location":"developers/debugging/sanitizers/#tsan-threadsanitizer","title":"TSan (ThreadSanitizer)","text":"To enable TSan, at the moment, the following 3 CMake options must be set:
cmake \\\n-DIREE_ENABLE_TSAN=ON \\\n-DIREE_BYTECODE_MODULE_ENABLE_TSAN=ON \\\n-DIREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON \\\n-DIREE_BUILD_SAMPLES=OFF \\\n...\n
In practice, IREE_ENABLE_TSAN
alone would be enough for many targets, but not all. The problem is that a IREE runtime built with IREE_ENABLE_TSAN
cannot load a IREE compiled LLVM/CPU module unless the following flags were passed to the IREE compiler: --iree-llvmcpu-sanitize=thread
and --iree-llvmcpu-link-embedded=false
.
The CMake options IREE_BYTECODE_MODULE_ENABLE_TSAN
and IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
ensure that the above flags are passed to the IREE compiler when building modules used in tests, benchmarks, etc. (anything that internally uses the CMake iree_bytecode_module
macro).
The CMake option IREE_BUILD_SAMPLES=OFF
is needed because samples currently assume that the embedded linker is used, so they are incompatible with IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON
.
At the moment, CMake logic heavy-handedly enforces that whenever IREE_ENABLE_TSAN
is set, these other two CMake variables are also set. That ensures that all tests succeed: no test is expected to fail with TSan.
If you know what you're doing (i.e. if you are not building targets that internally involve a LLVM/CPU iree_bytecode_module
), feel free to locally comment out the CMake error and only set IREE_ENABLE_TSAN
. Also see a past attempt to relax that CMake validation.
"},{"location":"developers/debugging/sanitizers/#msan-memorysanitizer","title":"MSan (MemorySanitizer)","text":"In theory that should be a simple matter of
-DIREE_ENABLE_MSAN=ON\n
However, that requires making and using a custom build of libc++ with MSan as explained in this documentation.
As of April 2022, all of IREE's tests succeeded with MSan on Linux/x86-64, provided that the vulkan
driver was disabled (due to lack of MSan instrumentation in the NVIDIA Vulkan driver).
"},{"location":"developers/debugging/sanitizers/#ubsan-undefinedbehaviorsanitizer","title":"UBSan (UndefinedBehaviorSanitizer)","text":"Enabling UBSan in the IREE build is a simple matter of setting the IREE_ENABLE_UBSAN
CMake option:
cmake -DIREE_ENABLE_UBSAN=ON ...\n
Note that both ASan and UBSan can be enabled in the same build.
"},{"location":"developers/debugging/sanitizers/#symbolizing-the-reports","title":"Symbolizing the reports","text":""},{"location":"developers/debugging/sanitizers/#desktop-platforms","title":"Desktop platforms","text":"On desktop platforms, getting nicely symbolized reports is covered in this documentation. The gist of it is make sure that llvm-symbolizer
is in your PATH
, or make the ASAN_SYMBOLIZER_PATH
environment variable point to it.
"},{"location":"developers/debugging/sanitizers/#android","title":"Android","text":"On Android it's more complicated due to this Android NDK issue. Fortunately, we have a script to perform the symbolization. Copy the raw output from the sanitizer and feed it into the stdin
of the build_tools/scripts/android_symbolize.sh
script, with the ANDROID_NDK
environment variable pointing to the NDK root directory, like this:
ANDROID_NDK=~/android-ndk-r21d ./build_tools/scripts/android_symbolize.sh < /tmp/asan.txt\n
Where /tmp/asan.txt
is where you've pasted the raw sanitizer report.
Tip
This script will happily just echo any line that isn't a stack frame. That means you can feed it the whole ASan
report at once, and it will output a symbolized version of it. DO NOT run it on a single stack at a time! That is unlike the symbolizer tool that's being added in NDK r22, and one of the reasons why we prefer to keep our own script. For more details see this comment.
"},{"location":"developers/design-docs/cuda-backend/","title":"CUDA backend design","text":"Authored March, 2021
This document is intended to provide an overview of the design choices made to support CUDA within IREE. It describes both the HAL runtime and the NVVM codegen side.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#cuda-hal-driver","title":"CUDA HAL Driver","text":"The CUDA HAL driver is in iree/hal/drivers/cuda/
directory. It is written in C following the standards of the rest of the HAL module.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#cuda-library-dependency","title":"CUDA library dependency","text":"IREE calls directly into CUDA driver API
. CUDA library is loaded dynamically and cuda.h header from CUDA SDK is part of IREE third_party project. Therefore IREE doesn't require CUDA SDK to be installed when building iree tools.
At runtime HAL CUDA driver will load libcuda.so/nvcuda.dll library and load a subset of the cuda driver API used in HAL. The list of functions being used are in the file iree/hal/drivers/cuda/dynamic_symbols_tables.h
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#driver","title":"Driver","text":"There is no direct equivalent in CUDA to the HAL driver abstraction. We use it to hold the symbols loaded for all the devices.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#device","title":"Device","text":"The equivalent to HAL device in CUDA is the CUcontext
, it holds all the state related to memory allocations.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#command-buffer","title":"Command buffer","text":"We implement command buffers using CUDA Graph API
. Using the Graph API allows to easily encode fine grain dependencies between dispatch without having to create multiple streams.
Note that Graph API is meant to be used for command buffers that can be recorded once and used several times and there may be a performance penalty to using Graph API for direct command buffer. It is likely that we will also have a pure stream implementation in the future if we see performance problems with direct command buffer usages.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#event-and-barrier","title":"Event and Barrier","text":"In HAL Event and Barrier are used for GPU<->GPU synchronization either within a command buffer (Event and Barrier) or between command buffers.
The current implementation ignores events and barriers and serializes all the nodes of the graph in order to have a conservative but correct solution.
The design we plan for the future is to map dependencies within a command buffer to graph dependencies in the CUDA Graph API. When an event is signaled all the leaf nodes of the graph will be saved in HAL data structure and when the same command buffer waits on the signal we will add all the nodes as dependency to the future nodes added to the graph.
For simplicity we always serialize command buffers sent to the same command queue.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#allocator","title":"Allocator","text":"The allocator will forward allocation requests to cuMemHostAlloc
for host accessible memory and cuMemAlloc
for device only memory.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#buffer","title":"Buffer","text":"CUDA buffers are represented either as a host pointer or a device pointer of type CUdeviceptr
.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#executable","title":"Executable","text":"HAL executable maps naturally to a PTX module. The compiler will generate a flat buffer containing a PTX text module as well as a list of entry point function names and the workgroup size associated with those entry points.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#semaphore","title":"Semaphore","text":"Timeline semaphore is used in IREE to handle coarse grain synchronization for CPU<->GPU, GPU<->GPU and CPU<->CPU. The interface follows closely Vulkan timeline semaphore spec
.
There is currently no simple way to implement this on CUDA. There are several solutions discussed on this IREE issue
but no obvious solution. For now we force CPU and GPU to be synchronized after every submit to ensure correctness and ignore the semaphore.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#nvvm-codegen","title":"NVVM Codegen","text":"","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#nvvm-and-ptx","title":"NVVM and PTX","text":"NVVM is a CUDA specific IR composed of LLVM IR and NVVM specific intrinsics. It can be compiled to PTX text using LLVM PTX backend. NVVM has an associated dialect in MLIR that translates 1:1 to NVVM intrinsics. This is what we are using to generate the PTX kernel code.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#iree-flow","title":"IREE flow","text":"IREE's target independent codegen converts the compiler input to Linalg on Tensors. Afterward IREE will call the LinalgToLLVMGPU codegen passes.
Once we get into LinalgToLLVMGPU passes we first do bufferize to generate Linalg on Buffers. Then we apply MLIR generic passes to convert linalg to SCF dialect and then SCF to Standard dialect. After that we convert Standard dialect to LLVM+NVVM dialect.
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/cuda-backend/#example","title":"Example","text":"Save the following mlir in /tmp/add.mlir
func.func @add(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {\n %0 = tensor.empty() : tensor<4xf32>\n %1 = linalg.generic {\n indexing_maps = [\n affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = [\"parallel\"]}\n ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>)\n outs(%0 : tensor<4xf32>) {\n ^bb0(%in: f32, %in_0: f32, %out: f32):\n %2 = arith.addf %in, %in_0 : f32\n linalg.yield %2 : f32\n } -> tensor<4xf32>\n return %1 : tensor<4xf32>\n}\n
# First compile into a VM bytecode module.\n$ ../iree-build/tools/iree-compile \\\n--iree-hal-target-backends=cuda \\\n/tmp/add.mlir \\\n-o /tmp/add.vmfb\n\n# Run the module through CUDA HAL backend.\n$ ../iree-build/tools/iree-run-module \\\n--device=cuda \\\n--module=/tmp/add.vmfb \\\n--function=add \\\n--input=\"4xf32=[1 2 3 4]\" \\\n--input=\"4xf32=[2 2 2 2]\"\n\nEXEC @add\n4xf32=3 4 5 6\n
","tags":["GPU","CUDA"]},{"location":"developers/design-docs/design-roadmap/","title":"Design roadmap","text":"A not-so-concise walkthrough of various IREE features that are in the design process and planned for future versions. A lot of the questions around how the IREE IR is designed and why certain components exist (such as the VM) hopefully become much clearer when seeing where we want to go with the infrastructure we are building (as opposed to where we currently are with our MVP slice). This document is not meant to encompass the entire design of any individual feature and if there's interest please say hi on the iree-discuss mailing list.
- Design roadmap
- Input Dialects
- Quantization
- flow: Data- and Execution-Flow Modeling
- Avoiding Readbacks with flow.stream
- Threading flow.stream through the CFG
- Predication of flow.dispatch
- Deduping flow.executables
- Rematerializing CSE'd Expressions
- Device Placement
- hal: Hardware Abstraction Layer and Multi-Architecture Executables
- Allow Targets to Specify hal.interfaces
- Target-specific Scheduling Specialization
- Buffer Usage Tracking
- Batched Executable Caching and Precompilation
- Target-aware Executable Compression
- Target-aware Constant Compression
- Command Buffer Stateful Deduplication
- Resource Timeline
- Transient Tensor Ringbuffer
- Timeline Semaphores on the Module ABI
- GPU-like CPU Scheduling
- vm: Lightweight Virtual Machine
- Coroutines for Batching and Cooperative Scheduling
- Cellular Batching
- Lowering to LLVM IR
- Improved Type Support
- Indirect Command Buffer/On-Accelerator Execution
"},{"location":"developers/design-docs/design-roadmap/#input-dialects","title":"Input Dialects","text":""},{"location":"developers/design-docs/design-roadmap/#quantization","title":"Quantization","text":"It's assumed that any work related to quantization/compression has happened prior to lowering into IREE dialects. Our plan is to use the proposed Quantization Transforms to achieve both training and inference-time quantization of types in a way that preserves maximum accuracy. IREE will support running with original unquantized floats in all cases, allowing for a smooth on-ramp to quantization and the gains in performance and reduction in model size that come from it.
As future work IREE would like to move beyond these transformation-directed approaches to quantization and interface directly to frontends which have a defined enough type system to represent accurate quantized (and otherwise compressed) computations directly, not relying exclusively on compiler-side type inference transforms.
"},{"location":"developers/design-docs/design-roadmap/#flow-data-and-execution-flow-modeling","title":"flow
: Data- and Execution-Flow Modeling","text":"The flow
dialect is designed to allow us to extract as much concurrency as possible from a program and partition IR into the scheduling and execution domains. Today we have the IR structure and transformation flow in place but have not yet got to the most interesting things such an infrastructure enables. A majority of the largest performance, latency, and memory usage improvements IREE can offer are determined first here and all following lowerings benefit. The fastest code is the code you don't execute and the smallest allocation is the allocation you don't make ;)
"},{"location":"developers/design-docs/design-roadmap/#avoiding-readbacks-with-flowstream","title":"Avoiding Readbacks with flow.stream
","text":"A majority of the readbacks we have today (manifested as flow.tensor.load.*
ops) will be removed when we have an HLO tensor->primitive conversion. There will still be cases when readbacks are required for correctness but they usually fall into a small set of usage patterns. For those that don't this is one place where IREE will warn about performance issues, allowing programs that perform suboptimally but encouraging authors to adjust their input model to enable better behavior. The IREE VM also has specific support for hiding readback latency in an efficient way via coroutines.
The most common case we are currently seeing in the IR is that of dynamic copies where the offsets are dependent on the result of previous computations. Source models may have top-k + gather operations, for example. These appear as a flow.stream
, a flow.tensor.load
, and then another flow.stream
that uses the loaded value for a flow.tensor.update
(or other operation):
%index_tensor = flow.ex.stream.fragment(...) -> tensor<i32> { ... }\n%index = flow.tensor.load %index_tensor : tensor<i32>\n%result = flow.ex.stream.fragment(%arg0 = %index : i32, ...) -> ... {\n %0 = flow.dispatch ...\n %1 = flow.tensor.update %0, %arg2[%index] : tensor<10xf32> -> tensor<1x10xf32>\n ...\n}\n
Today the flow.tensor.update
turns into HAL command buffer transfer operations that must have their offsets known at recording time. This is a limitation of vkCmdCopyBuffer
but not a fundamental limitation of any hardware. In fact several drivers implement copies as small built-in shader programs meaning that we could perform the same expansion here with the right primitives. This would allow, in the above example, both the index to be computed and the tensor to be updated within the same stream to entirely remove the host round-trip.
"},{"location":"developers/design-docs/design-roadmap/#threading-flowstream-through-the-cfg","title":"Threading flow.stream
through the CFG","text":"The current flow.ex.stream.fragment
, as denoted by the ex
perimental tag, is a temporary implementation designed to get the concept of streams lowered to the HAL dialect. For streams to be effective at modeling larger concurrency scopes they need to be able to move across branches in the CFG. This intuitively follows exactly what one would do if recording commands in C:
vkCmdCopyBuffer(cmd, ...);\nif (some_flag) {\nvkCmdBindPipeline(cmd, ..., pipeline_a);\n} else {\nvkCmdBindPipeline(cmd, ..., pipeline_b);\n}\nvkCmdDispatch(cmd, ...);\n
The corresponding flow
IR:
flow.stream.append[%s0](...) {\n flow.tensor.update ...\n }\n %b = arith.cmpi ne %some_flag, ...\n cond_br %b, ^a(%s0), ^b(%s0)\n^a(%s1):\n flow.stream.append[%s1](...) {\n flow.dispatch @pipeline_a, ...\n }\n br ^end(%s1)\n^b(%s2):\n flow.stream.append[%s2](...) {\n flow.dispatch @pipeline_b, ...\n }\n br ^end(%s2)\n^end(%s3):\n ...\n
This allows the entire stream to be lowered into one command buffer without the need for any host round-trips. The conversion into the flow
dialect will walk the CFG and attempt to thread the flow.stream
values through so long as there are no external dependencies.
"},{"location":"developers/design-docs/design-roadmap/#predication-of-flowdispatch","title":"Predication of flow.dispatch
","text":"While the flow.stream
threading through the CFG can remove many of the simpler conditional dispatches there will always be some that will have their execution dependent on the result of prior dispatches. For these a flow.cond_dispatch
will allow a condition to be provided that must be true for the dispatch to actually be performed.
For targets that natively support predication in their command buffers (such as D3D12's ID3D12GraphicsCommandList::SetPredication) this provides a host round-trip-free way of conditionally executing dispatches and transfers. Unfortunately Vulkan support is still lacking, though Nvidia supports the VK_EXT_conditional_rendering extension that exposes the same behavior.
For targets that do not support predication natively it's still possible to emulate predication with indirect dispatches. In this model the workgroup counts normally used to dispatch execution are sourced from another device buffer at the time the dispatch is made instead of sourced from the command buffer at the time the dispatch is recorded. Degenerate dispatches with counts of 0, 0, 0
allow for effective neutering of the dispatch with minimal overhead (vs. the significant penalty of a host round-trip!).
By modeling such predication at the flow
level we are able to lower into the HAL with target-aware predication semantics and fuse indirect dispatch workgroup count calculations into existing dispatches already being performed such that overhead is reduced.
"},{"location":"developers/design-docs/design-roadmap/#deduping-flowexecutables","title":"Deduping flow.executable
s","text":"While still in the flow
dialect, the executables are target-agnostic. This makes simple IR tree diffing a potential solution to deduplication. Since most of the dispatches originate from the same source-language library calls in input frameworks there's a high likelihood of duplication, and depending on when inlining is performed we may have stronger or weaker ability to perform the deduplication. Thanks to the MLIR canonicalization pass (that ensures ops are rearranged into consistent canonical representations) the IR comparisons can be done rather trivially.
"},{"location":"developers/design-docs/design-roadmap/#rematerializing-csed-expressions","title":"Rematerializing CSE'd Expressions","text":"Common subexpression elimination is performed many times during lowering, however there comes a point where the CSE can introduce false dependencies and additional allocations that are otherwise avoidable. For example if a broadcasting operation is CSE'd and then the result is used by two or more operations that are scheduled independently what would have been a relatively cheap lowering of the broadcast to a simple index remapping now becomes an additional dispatch, materialization of an intermediate tensor, and a barrier:
%bcast = \"mhlo.broadcast_in_dim\"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>\n%mul1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>\n// (pretend something here that prevents fusion)\n%mul2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>\n
%bcast = flow.dispatch.region(%cst : tensor<f32>) -> tensor<1024x10xf32> {\n %0 = \"mhlo.broadcast_in_dim\"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>\n return %0 : tensor<1024x10xf32>\n}\n// a barrier will be required here\n%mul1 = flow.dispatch.region(%arg0 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {\n %1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>\n return %1 : tensor<1024x10xf32>\n}\n%mul2 = flow.dispatch.region(%arg1 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {\n %2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>\n return %2 : tensor<1024x10xf32>\n}\n
Instead the broadcast should be rematerialized inside of both dispatch regions as the cost of doing so is significantly less in compute resources and then the intermediate tensor will not be required at all. Though at first it may seem counter-intuitive to undo such a critical optimization as CSE (both to code size and often to compute) but here it's something we must carefully balance while looking at the whole system. It gets even more important when considering multi-device execution as the cost of sharing memory and synchronizing may be extremely non-trivial.
"},{"location":"developers/design-docs/design-roadmap/#device-placement","title":"Device Placement","text":"While still within the flow
dialect we have the ability to easily split streams and safely shuffle around operations. Target execution backends can opt into such behavior to ensure that device restrictions such as maximum in-flight memory, maximum scheduling depth, and capabilities are observed. For heterogeneous configurations the intent is that certain operations, dispatches, and streams can be attributed to specify which device categories they should be lowered. The constraint solving that takes place can be provided with generic heuristics (\"big GEMMs go on the accelerator\"), profile-guided databases based on benchmarks, learned traits via ML, etc.
"},{"location":"developers/design-docs/design-roadmap/#hal-hardware-abstraction-layer-and-multi-architecture-executables","title":"hal
: Hardware Abstraction Layer and Multi-Architecture Executables","text":"As the IREE HAL is designed almost 1:1 with a compute-only Vulkan API many of the techniques classically used in real-time graphics apply. The benefit we have by modeling our usage of such a low-level API in IR is that the normal work - some of which is very non-trivial - for managing allocations, tracking resource lifetime, and ensuring proper synchronization/barriers is something we can apply the full force of an offline compiler against.
"},{"location":"developers/design-docs/design-roadmap/#allow-targets-to-specify-halinterfaces","title":"Allow Targets to Specify hal.interface
s","text":"The hal.interface
op specifies the ABI between the scheduler and the device containing the buffer bindings and additional non-buffer data (parameters, shapes, specialization flags, etc). Today a na\u00efve ordering is used uniformly for all targets however it is possible for target backends to opt into providing their own interfaces based on target configuration. The same hal.executable
may have multiple interfaces and the same backend may use one or more. This is useful for when target capabilities may vary at runtime, such as the number of available storage buffer bindings in Vulkan. By exposing a few hal.interface
variants with different binding amounts the Vulkan backend could make better use of the larger number of bindings available at runtime while still providing support for smaller configurations.
Once we have multiple hal.interface
s defined for executables the scheduler needs to emit HAL ops that properly switch between them. By having a canonical form for bindings we can ensure that only the differences between the interfaces will need additional code.
"},{"location":"developers/design-docs/design-roadmap/#target-specific-scheduling-specialization","title":"Target-specific Scheduling Specialization","text":"Though the flow
dialect attempts to fuse as many ops as possible into dispatch regions, it's not always possible for all target backends to schedule a region as a single dispatch. A classic example is algorithms like parallel reduction commonly used on GPUs that may require many dispatches to identical executables, while other algorithms may vary the executables they use based on the input parameters such as shape or the target runtime device support.
By default the flow.dispatch
executable translation to hal.executable
s is performed 1:1 and it is assumed that a single dispatch is required. Extending target backends with scheduling interfaces (enabling them to opt into different scheduling behavior) will allow the backends to emit any number of hal.executable
s and any stream commands (such as additional dispatches or transfers) they may need. This is effectively equivalent to what would be done at runtime only because we are still operating on IR prior to buffer allocation and can use the hal
ringbuffer primitive. Through this we can elide many of the allocations that would otherwise be required at runtime (and the concurrency-limiting false dependencies that usually come along with scratch memory).
Since the algorithm used may vary based on the parameters of the dispatch (such as the shape of the reduction which may be dynamically determined) scheduling specialization may occur even when targeting a single backend. In many cases folding and canonicalization can eliminate the overhead as whether one dynamically computed workgroup size is used instead of another the same IR is present.
"},{"location":"developers/design-docs/design-roadmap/#buffer-usage-tracking","title":"Buffer Usage Tracking","text":"Many explicit hardware APIs require knowing how buffers are used alongside with where they should be located. For example this additional information determines caching policy on buffer accesses (write-through, write-back, etc), visibility of writes across compute units, and the possible MMU properties that may need to be maintained/matched for the buffer. By using the SSA-form value-semantics of the MLIR tensor
as used in the flow
dialect we have complete information of where buffers may be used or at least where they enter or leave regions where we can derive such information.
Analysis passes can run over IR to attribute tensors such that when allocation is performed when lowering to the hal
dialect we do so from an allocator compatible with where the buffer will be used, with memory types chosen based on the potential cost and location of operations performed (write-only on host vs. read-write on host and device, etc), and with usage bits indicating what kind of operations may be performed on the buffer. Many of these are local transformations as most buffers are only live within very small regions such as the flow.stream
encompassing their usage.
Traditional systems need to either use very permissive buffer properties or heuristics that can introduce additional non-trivial overhead when such heuristics are incorrect. For example, OpenGL had several such usage hints that drivers were then able to use but almost no drivers behaved as desired in all cases and it lead to additional memory ghosting, copies, readbacks, and unpredictable performance. For almost all uses of the buffers within an IREE invocation we instead can know precisely where and how buffers may need to be moved and do it a minimum number of times if it is required.
"},{"location":"developers/design-docs/design-roadmap/#batched-executable-caching-and-precompilation","title":"Batched Executable Caching and Precompilation","text":"For targets that may require runtime preprocessing of their executables prior to dispatch, such as SPIR-V or MSL, the IREE HAL provides a caching and batch compilation mechanism based on Vulkan's Pipeline Cache.
Today each executable is compiled on-demand and cached only for the process lifetime. Though some drivers may provide their own caching we can make better use of the explicit caching and compilation behavior with the additional information we have in the compiler.
For any given entry point (or group of entry points) into an IREE module we can perform reachability analysis to know which executables may be executed when that entry point is invoked. In this way we can emit pre-invocation compilation checks (similar to an std::call_once
block) that provides all required executables for compilation and allows more efficient compilation through multithreading the compiler invocations. These same compilation caching function can be exposed and invoked manually by an application to force pre-compilation when it is least likely to impact the user, such as a post-install/first-run step or concurrently while other application features are loading.
We can use zero or more scoped caches for executables within a module. Completely dynamic modules (such as those emitted in eager-mode usage) may avoid the caching overhead entirely, while modules that have several primary usage modes (such as training and inference) may choose to use independent caches for each such mode.
The caches generated can then be retrieved and saved by the hosting application. Upon the next execution the application can provide the caches and if still valid they will be used to avoid compilation.
"},{"location":"developers/design-docs/design-roadmap/#target-aware-executable-compression","title":"Target-aware Executable Compression","text":"An advantage of representing executable binaries in IR after translation is that we can apply various post-compilation compression and minification techniques while still know precisely where the executable will be used. This is extremely important for SPIR-V as it is not designed to be a small at-rest format. Though the biggest lever we have to control generated code size is higher-level deduplication and specialization there will still be a sufficiently large number of executable binaries we will need to embed within the final modules and having targeted approaches for reducing their size beyond just \"gzip everything\" is very powerful.
For example, SMOL-V is a fantastic lossless SPIR-V compression technique that, when coupled with modern dictionary-based compression algorithms, can save significant binary size. As a data point, the SPIR-V corpus SMOL-V uses for testing goes from 4.8MiB of raw SPIR-V to 348KiB of compressed SMOL-V.
Combined with Batched Executable Caching and Precompilation we can easily use shared dictionaries and other cross-artifact compression in a relatively plug-in way.
"},{"location":"developers/design-docs/design-roadmap/#target-aware-constant-compression","title":"Target-aware Constant Compression","text":"It's still an area that needs more research but one goal of the IREE design was to enable efficient target- and context-aware compression of large constants (typically model weights/parameters/embeddings). This may mean reusing existing hardware compression formats on GPUs, ML accelerator-specific formats, or very-low-bit-depth (1-4 bit per value) quantization techniques that cannot be directly used without first decompressing. The inspiration here is formats like Crunch and Basis Universal that perform \"supercompression\", and we may even be able to use these directly as then we can make use of GPU hardware samplers to do the 4-bit to 32-bit decompression, etc.
"},{"location":"developers/design-docs/design-roadmap/#command-buffer-stateful-deduplication","title":"Command Buffer Stateful Deduplication","text":"The IREE HAL - much like Vulkan it is based on - eschews much of the state that traditional APIs have in favor of (mostly) immutable state objects (pipeline layouts, pipeline states, descriptor sets, etc). There are still a few stateful entry points in the API, though, and deduplicating or reordering redundant calls can reduce both IR, API, and execution overhead.
The key place this will have the largest impact is around descriptor set bindings and push descriptors, both of which are state and can have non-trivial setup overhead. A canonicalization for such commands that inspects the target hal.command_buffer
to see if the same state was set prior and code motion to move such commands out of loop bodies when possible would be helpful.
"},{"location":"developers/design-docs/design-roadmap/#resource-timeline","title":"Resource Timeline","text":"A core concept of the IREE scheduler that allows for overlapping in-flight invocations is that of the resource timeline. This identifies module state that can be in use by multiple invocations and assigns timeline milestones denoting when the resource will be in the appropriate state for the current invocation to proceed. Conceptually it is like a epoch-based synchronization mechanism as commonly found in garbage collectors to allow for lock-free asynchronous memory reclamation.
The advantage we have in the IR is that we know both the usage of all resources thanks to buffer usage tracking and the synchronization domains of all resources (in most cases). This allows us to effectively assign one timeline semaphore per writeable resource while in practice having far fewer than 1:1, as for example if two resources are only ever written in the same command buffer only one semaphore is needed to signal the completion of both writes.
By transforming IR to sink all resource reads and writes closest to where the value is used we can enlarge the time windows that can overlap across invocations that may share those resources. This is similar to what out-of-order CPUs do with register renaming/reorder buffers/etc and something we can apply some traditional instruction scheduling techniques to (only here our 'instructions' are entire command buffer dispatches/transfers).
Two degenerate cases of this approach are that of resource indirection (util.ptr<tensor<T>>
) and dynamic resource shapes. In these two cases it may not be possible to continue recording commands even if we are able to ensure execution is appropriately synchronized. This is where indirect dispatch, predication, indirect command buffers, and VM coroutines can all help cover for the times where we are unable to transform away the indirection or emit shape logic without data dependencies.
"},{"location":"developers/design-docs/design-roadmap/#transient-tensor-ringbuffer","title":"Transient Tensor Ringbuffer","text":"(When properly implemented) almost all buffers required during execution never escape the command buffers they are used in or a single VM invocation. We can trivially identify this from the explicit captures of flow.stream
and flow.dispatch
ops and the fact that all tensor types have value-semantics. Only those tensor values loaded-from/stored-to module state or that cross the exported module function boundary need special consideration while almost everything else can live transiently only so long as it is required during execution.
Thanks to this information about buffer usage and lifetime we can use a ringbuffer to store the transient tensor data and other required data reservations such as uniform buffers used to pass dynamic parameters (shapes, flags, etc) into dispatches. This gives the compiler and the application a knob that allows them to control maximum concurrency (by having a very large ringbuffer) or maximum memory usage (by having a minimally small ringbuffer).
Allocating tensors from the ringbuffer does not require sophisticated runtime packing as we can emit IR to calculate required sizes for dynamically shaped tensors. Whether a basic block reserves %sz = arith.constant 42 : index
bytes or %sz = std.muli %cst, %dyn_dim : index
bytes doesn't materially change how the allocations are performed. Since almost all usage involves simple write head bumps there is no need for ahead-of-time memory planning or large fixed allocations, and since no buffer within the ringbuffer can alias we can have coarse (read: low overhead) guarantees about the availability of certain regions of the ringbuffer (\"when this event is signaled all prior ringbuffer writes have completed\").
Usually any planning we may want to perform can be done in IR via code motion. For example applying traditional algorithms used to reduce register pressure will help us attain narrower live windows within the ringbuffer leading to a larger number of in-flight operations for the same ringbuffer memory usage.
We may end up using both a classical ringbuffer and a variant known as the bip buffer because it is better for descriptor set utilization (as we can provide many dispatch parameters with a single base offset bound once at the beginning of a region).
"},{"location":"developers/design-docs/design-roadmap/#timeline-semaphores-on-the-module-abi","title":"Timeline Semaphores on the Module ABI","text":"Functions calls made across modules (either from C++ into the VM, VM->VM, or VM->C++) should be able to define timeline semaphores used to wait and signal on the call. We can do this by making all exports automatically have the semaphores and then make invocations populate them if they were not provided by the caller. In this way we can allow multiple invocations of exported functions to chain naturally with internal asynchronous workloads, turning most IREE invocations into just recording of command buffers that can never block.
When combined with VM coroutine support we even have the ability to interleave any required host execution between the wait and signal semaphores provided such that the caller never knows on which device execution is taking place. It's still possible to provide synchronous wrappers that emulate blocking behavior but by having the core system designed around a single system-supported primitive we avoid the need for additional things like interrupt watchdog threads, implicit blocking, and other pitfalls.
"},{"location":"developers/design-docs/design-roadmap/#gpu-like-cpu-scheduling","title":"GPU-like CPU Scheduling","text":"One approach to using multiple cores on a CPU is to perform interior parallelization of operations such as OpenMP or library-call-based custom thread pools (gemmlowp). This works when each individual operation is relatively costly vs. potential pipeline bubbles caused by work spinning down near the end of an operation and spinning up at the beginning of the next.
IREE is designed to handle many more workloads - some of which have very narrow shapes but very deep pipelines (like search algorithms) - such that the above approach of multithreading within ops becomes a bottleneck. These workloads are traditionally very poorly handled by frameworks and issues with oversubscription, pipeline stalls, and suboptimal system schedulers (such as on Android) can lead to more time being spent thrashing about than actually executing real work.
The approach we take here is to treat the cores of a CPU as if they were computation units on a GPU, each able to perform some set of heterogeneous work independent of others units. This means that the concurrency we are trying to model at the flow
level and communicate to the runtime via the hal
that explicitly states which dispatches can overlap and the size of the workgroups can trivially be used to distribute this work over many cores exactly as a GPU would do it. Integration with library calls that may require their own threading (such as Ruy) requires that they be able to use the IREE thread pool instead of their own.
In this way we can avoid pipeline bubbles and other latency-inducing unpredictable scheduling. This does not mean that we treat individual units of work at the same scale as we would for GPUs, but instead that we tile and have one or more processing units that allows us to work on those tiles. Whether the tile size is defined by a library call contract, heuristics, or empirically is TBD, but expect workgroup sizes in the thousands to millions of invocations vs. normal GPU workgroup sizes in the dozens to hundreds of invocations.
To achieve this style of scheduling efficiently we'll likely use something like marl as the scheduler. Marl provides cross-platform low-overhead fibers and is compatible with this style of scheduling as it was built for the Swiftshader software rasterizer.
Even if IREE was only targeting CPUs the assertion is that we would still want to schedule this way and it's only an incidental benefit that if building for heterogeneous targets the scheduling code may be shared (just with a different divisor for workgroup count calculations).
"},{"location":"developers/design-docs/design-roadmap/#vm-lightweight-virtual-machine","title":"vm
: Lightweight Virtual Machine","text":"The VM is designed as a dynamic linkage ABI, stable bytecode representation, and intermediate lowering IR. Many of the optimizations we can perform on it will benefit all use cases (such as when lowering to LLVM IR) by allowing higher-level program transformations around synchronization that are difficult to perform on arbitrary LLVM IR.
"},{"location":"developers/design-docs/design-roadmap/#coroutines-for-batching-and-cooperative-scheduling","title":"Coroutines for Batching and Cooperative Scheduling","text":"One of the largest features currently missing from the VM is coroutines (aka user-mode fiber scheduling). Coroutines are what will allow us to have multiple in-flight invocations into a module - some of which may be waiting on external events - without the need for complex multithreading logic or state machine machinations.
In many cases once semaphores are exposed to callers we will not need to yield in the VM. The user will call into the module with provided semaphores, the work to perform will be recorded to one or more command buffers and submitted to the device, and then control return will return to the caller immediately.
In cases requiring host readbacks that we were not able to remove, however, additional VM code may need to run prior to when the final semaphore is signaled. To preserve the asynchronous interface and immediate execution guarantees the compiler can emit explicit yield points (vm.yield
) that are known-good locations for yielding (such as most resources not required after the yield having been flushed/discarded, partial synchronization scope availability if other work may be able to execute concurrently irrespective of the yielded coroutine, etc).
When the VM encounters the yield at runtime it will suspend the coroutine until a defined condition is met. Many coroutines can be in various states at any given time and - thanks to the resource timeline - can still be memory safe. For example if two stateless invocations are made with a common wait semaphore both can be recorded and submitted without waiting on each other. If there is internal module state accessed the invocations are implicitly ordered by invocation order (similar to what Vulkan calls API order) based on internal resource timeline semaphores.
Waking the coroutines can be performed by either an application-provided callback in the case of the application already having a periodic event which is doing bookkeeping (such as frame end callbacks when rendering or Looper idle events on Android), giving direct control over the frequency and location which IREE utilizes to perform additional work. A helper will be provided as well that runs a dedicated IREE thread to do this, but the expectation is that applications can often do a better (and importantly more predictable) job.
By utilizing coroutines IREE will have a way to fill traditional pipeline bubbles even with execution from the same module (let alone across modules) in the situation where host readbacks or other logic is required. This increases overall throughput and utilization while reducing host wakeups as many coroutines can be processed at once to submit new work to the device queues, though it does not help reduce per-invocation latency.
External code such as the HAL implementation or user ops may provide the wait handles used for continuation. For example, the HAL can expose a function that yields and wakes only when one or more timeline semaphores reach their target values:
// submit work\nhal.device.yield %semaphore4 >= %sem4_target, %semaphore5 >= %sem5_target\n// continue here, possibly much later in time\n
"},{"location":"developers/design-docs/design-roadmap/#cellular-batching","title":"Cellular Batching","text":"Though coroutines help throughput there is a way we've found to reduce latency that's been documented as cellular batching. This same technique has been implemented in prior internal systems and is one of the motivating design goals for IREE's creation. The core idea is to identify small uniform work that can be partitioned and scheduled greedily such as to enable batching or reduce associated invocation costs (such as refreshing accelerator SRAM/caches with new parameters). This usually manifests as finding large GEMM/GEMV operations using the same fixed parameters and either dynamically increasing the batch size by adding the waiting work (without deferring the actual execution time) or sequencing them back to back to ensure better cache utilization. Which approach is taken depends on any data dependencies that may be present (such as LSTM state feedback edges).
With the foundation of coroutines in IREE it's possible to yield execution at any given point - including during command buffer recording - and wake on specific conditions. A majority of the logic can be built into the module itself with very little need for runtime machinery, as shared VM variables can be used to track pending work across invocations (even from different parts of the program) and flush based on logic wholly controlled by the user or compiler (such as count/max time latency/etc limits). This allows for the large variety of scheduling behavior various applications may want to use, such as a zero-latency batch-only-within-this-invocation to a Nagle's Algorithm-esque time or limit based behavior or even some learned model-specific windowing.
Design work is still required on how to represent this in IR but the current thought is to model the regions in which deferred execution is possible and beneficial and allow during lowering to the VM additional transformations. This is similar to how the async-await behavior works in C# where the async keyword is just sugar that expands to additional generated helper utilities.
A simple strawman representation for sequential dispatch may look like:
hal.scheduling_policy @defer_policy {\n // max time, max count, max live memory, etc\n}\n...\nhal.command_buffer.dispatch.deferred @defer_policy, @dispatch, ...\n// vm.yield added here during lowering\n
There are many cases to explore and as cellular batching can have performance benefits of several orders of magnitudes it'll be one of the primary areas of research in the long-term.
"},{"location":"developers/design-docs/design-roadmap/#lowering-to-llvm-ir","title":"Lowering to LLVM IR","text":"For scenarios where dynamic module loading is not required and entire modules can be compiled into applications we can lower the VM IR to LLVM IR within MLIR's transformation pipeline. Instead of embedding vm.call
ops that are dispatched at runtime to things like the HAL we can instead lower to llvm::CallInst
to runtime-resolved function pointers. This still enables all of the flexibility of heterogeneous/runtime-determined devices, pluggable diagnostics, and backend composition without any need for FlatBuffers or the VM bytecode interpreter.
The VM was designed to make such a lowering easy and the C-style struct-based function pointer registration for runtime modules was designed to make emitting code that used it fairly robust even when linked in dynamically such as when embedded in shared objects.
An extension of this is what we've been calling 'runtimeless mode', where the IREE VM linkage code is statically linked into the binary alongside the generated module LLVM IR. If only a single HAL backend is linked in then (with some build-fu) we should be able to get call devirtualization to reduce code size to precisely the functionality used by the module.
"},{"location":"developers/design-docs/design-roadmap/#improved-type-support","title":"Improved Type Support","text":"Currently the VM only supports two types: i32
and vm.ref<T>
. This is an intentional limitation such that we can determine what is really needed to express the scheduling we perform, with the idea being that such a limited model will make it easier to use techniques like indirect command buffers to compile the VM itself to an accelerator executable that dispatches work without host involvement.
As we port more models we may find a few primitives that are worth bringing into the VM design such that it's worth potential complications to future porting. These includes types like f32
(for simple float calculations/comparisons), list
/dict
(easier python compatibility), and vector<4xf32>
(for simple inline calculations that are not worth dispatch overhead/synchronization).
"},{"location":"developers/design-docs/design-roadmap/#indirect-command-bufferon-accelerator-execution","title":"Indirect Command Buffer/On-Accelerator Execution","text":"Though IREE will use many different tricks such as predication to build deep pipelines there is still the requirement that the command recording and submission happens on the host CPU. Though the cost of this in terms of latency and power use can be minimized by coalescing and timelines there is still the possibility of non-trivial roundtrips being introduced that limit performance. For particular applications like low-power always-on compute or where there is significantly branchy behavior (such as search algorithms) it is important that the decision making logic as to what is dispatched runs as close to real-time as possible within the execution pipeline.
The IREE VM is designed to be runnable on-device in a secure and cooperative way (no pointers, indirect buffer handles to allow for memory space rearrangement op-to-op, deterministic execution and explicit yield points, etc).
The recent efforts to bring indirect command buffers to Vulkan and Metal's Indirect Command Buffers (that both derive inspiration from NV_command_list) are one such target for this. Either by lowering the VM IR to LLVM IR or SPIR-V, by a special conversion to target-specific forms, or by actually executing the VM bytecode directly on-device (it's ~1000 LoC) we should be able to prototype what full on-device usage is like. Even if only some VM functions the compiler deems useful to schedule on the device are used and the rest run on the host (particularly those functions calling imported functions) some of the most costly logic that creates tight coupling of the host and device scheduling can be limited.
"},{"location":"developers/design-docs/function-abi/","title":"Function ABI","text":"Note
Authored December, 2019
Updated August, 2021
A key job of the IREE compiler and runtime is capturing function call semantics from the originating system and providing mechanisms so that invocations can be performed in as similar way as possible in various target languages. In general, this requires additional metadata on top of the raw characteristics of a function. Where possible, this is done by attaching attributes to a function.
iree.abi
: JSON encoded description of the function's calling convention.
"},{"location":"developers/design-docs/function-abi/#v1-abi","title":"V1 ABI","text":"This is the default ABI supported by the IREE VM invocations. It attempts to provide a default calling convention that can be used without further reflection metadata but which may be enhanced with it.
It natively allows monomorphic functions to be exported where arguments and results are composed of the following types:
"},{"location":"developers/design-docs/function-abi/#value-types","title":"Value Types:","text":" - Byte aligned integer type (i8, i16, i32, i64)
- Floating point value (f16, f32, f64)
"},{"location":"developers/design-docs/function-abi/#reference-types","title":"Reference Types:","text":" -
ND-Array buffers of Value Types:
- Simple: Packed, C-layout
- Strided: Arbitrary layout with strides (future)
-
String (byte arrays)
-
Opaque reference object
"},{"location":"developers/design-docs/function-abi/#sequence-types","title":"Sequence Types:","text":" - Tuples: fixed length lists where each position has its own type bound
- Homogenous list: lists of arbitrary size where a single type bound applies to all elements
The intent with these low level types is that calling conventions can be synthesized to bind arbitrary high level, domain/language specific signatures to these types, possibly by way of additional reflection metadata.
"},{"location":"developers/design-docs/function-abi/#representations","title":"Representations:","text":"The above are all representable with native constructs in the VM:
-
ValueType:
- Runtime:
iree_vm_value
- Compile Time: primitive MLIR integer/floating point types
-
Simple ND-Array Buffer:
- Runtime:
iree_hal_buffer_view
- Compile Time:
tensor<>
-
String:
- Runtime:
iree_vm_list
containing i8
- Compile Time:
!util.list<i8>
-
Tuple:
- Runtime:
iree_vm_list
of variant - Compile Time:
!util.list<?>
- Note that these are statically type erased at the boundary.
-
TypedList (homogenous):
- Runtime:
iree_vm_list
of T
- Compile Time:
!util.list<T>
"},{"location":"developers/design-docs/function-abi/#extended-type-calling-conventions","title":"Extended Type Calling Conventions","text":"While the above features of the native ABI may be sufficient for direct use by various programs, many programs and callers will need to represent various higher level types, consistently mapping them to the above facilities. This section describes calling conventions for various higher level types which do not map 1:1 to the above. Not all source language types are representable, and extending these calling conventions (and the fundamental types above) is demand driven.
All of these calling conventions presume that the arity of the arguments/results of the raw function matches the user-level function, meaning that the calling convention is specified per argument/result. Higher-level whole function transformations may also exist for some domains but are outside of the scope of this specification.
"},{"location":"developers/design-docs/function-abi/#structure","title":"Structure","text":"A Structure
is a common enough entity to have a dedicated calling convention. In C-like languages, this may just be a struct
. In Python, it is typically a dict
with an associated schema providing a name and type bound for each of its slots. In both, its slots are of fixed arity.
In this convention, such a structure is represented as a Tuple
in the native calling convention (i.e. !util.list
of variant type). The order of the elements of the tuple are the natural order of the structure, where that is either:
- For a C-like system where order is determinate, it is the order of declaration.
- For a name-based system (i.e. bind to
dict
) where no order is defined, the natural order will be the lexically sorted order of the keys.
"},{"location":"developers/design-docs/function-abi/#string","title":"String","text":"Most languages interop between byte arrays (i.e. the native ABI String
type) by way of applying an encoding. Such strings are just a sequence of bytes (i.e. !util.list<i8>
).
"},{"location":"developers/design-docs/function-abi/#typed-list","title":"Typed List","text":"High level lists which all share the same type bound are represented as a TypedList
in the native ABI.
"},{"location":"developers/design-docs/function-abi/#ndarray-of-reference-types","title":"NDArray of Reference Types","text":"NDArrays of reference types are considered separately from those of value types. Internally, the code generated for them is completely different from what gets generated for numeric based arrays (i.e. has ref-counting, ownership semantics, non-POD, etc). These types are permitted for completeness, not necessarily performance: by nature they are already indirected and have overheads.
In the native ABI, these are represented as a composite tuple type (i.e. today a list since sugar for tuple is not yet defined): !iree.tuple<!util.list<T>, !util.list<index>>
. The first element of the tuple is the list of values, packed with a C-Layout and the second element is the list of dimension sizes.
"},{"location":"developers/design-docs/function-abi/#reflection","title":"Reflection","text":"Additional reflection metadata may be encoded in a custom JSON form, providing additional typing hints for arguments and results. If present, this will be a reflection attribute with key d
, containing a serialized JSON object.
The JSON object contains:
a
(array): List of type records for each argument. r
(array): List of type records for each argument.
Type records are one of:
-
A string naming a primitive type:
i[0-9]+
: Integer type with given bit width f[0-9]+
: IEEE floating point type with given bit width bf16
: BFloat16
-
JSON null
: A null reference value
-
\"unknown\"
: An unknown/unmapped type
-
An array, interpreted as a tuple describing a compound type.
"},{"location":"developers/design-docs/function-abi/#compound-type-tuples","title":"Compound type tuples","text":"A compound type tuple has a type identifier as its first element, followed with type specific fields:
[\"named\", \"key\", {slot_type}]
: Associates a name with a slot. This is used with the root argument list to denote named arguments that can be passed positionally or by keyword. [\"ndarray\", {element_type}, {rank}, {dim...}]
: For unknown rank, the rank
will be null
and there will be no dims. Any unknown dim will be null
. [\"slist\", {slot_type...}]
: An anonymous structured list of fixed arity and slot specific types. If there are gaps in the list, empty slots will have a null
type. [\"stuple\", {slot_type...}]
: Same as slist
but some languages differentiate between sequences represented as lists and those represented as tuples (read-only lists). [\"sdict\", [\"key\", {slot_type}]...]
: An anonymous structure with named slots. Note that when passing these types, the keys are not passed to the function (only the slot values). [\"py_homogeneous_list\", {element_type}]
: A Python list of unknown size with elements sharing a common type bound given by element_type
.
"},{"location":"developers/design-docs/invocation-execution-model/","title":"Invocation execution model","text":"Authored June, 2022
This documents the behavior of the user-visible invocation mechanism IREE uses to schedule program execution. Internally IREE uses a very similar modeling for tracking its internal workloads and in kind carries that down to target APIs and devices that themselves use a very similar model. The intent is to expose the device model in an abstracted way that allows for the full capture and communication of the execution intent to be propagated to the hardware that executes it. Though here we focus on the user-visible portion of execution there is really only one \"IREE execution model\" and the entire stack follows the same design. At its core this design is just an instantiation of an out-of-order execution algorithm such as those originating from the 1960's.
"},{"location":"developers/design-docs/invocation-execution-model/#glossary","title":"Glossary","text":"stateDiagram\n state UserApplication {\n direction BT\n state Context0 {\n ModuleA-->ModuleAState0\n ModuleB-->ModuleBState0\n }\n state Context1 {\n ModuleA-->ModuleAState1\n ModuleB-->ModuleBState1\n ModuleC-->ModuleCState1\n }\n state ModuleA {\n @func1\n @func2\n }\n state ModuleB {\n @func3\n @func4\n }\n state ModuleC {\n @func5\n }\n }
"},{"location":"developers/design-docs/invocation-execution-model/#program","title":"Program","text":"An IREE program is a collection of modules instantiated in a context from which invocations can be made. Invocations are ordered on a user-controlled timeline that uses fences to define the execution order requirements to enable out-of-order execution. A hosting user application may have multiple programs or multiple instances of the same program available and running invocations at a time across multiple timelines.
"},{"location":"developers/design-docs/invocation-execution-model/#module","title":"Module","text":"Modules define executable code and data that can be loaded, linked, and run \u00e0 la ELF shared libraries. Modules may be implemented as C/C++, generated bytecode or C sources from the IREE compiler, or any other mechanism that can run code and implement the iree_vm_module_t
interface. Modules on their own are read-only and can be reused across many contexts.
Traditional ML runtimes would use a model (graph, etc) as their module representation. In IREE everything is a module including runtime subsystems like the HAL and user-provided custom code. This ensures that anything IREE can do can be externalized and replaced by users without needing to modify the core IREE code.
"},{"location":"developers/design-docs/invocation-execution-model/#context","title":"Context","text":"A collection of modules are linked and instantiated in a context. Each context operates independently and carries its own copies of mutable module state. Invocations execute within a context scope and hosting applications coordinate across contexts as required. Contexts are cheap to create (microseconds) and retain (~100B + program state) such that users can decide how to manage them based on their scenario.
Traditional ML runtimes would call these \"sessions\" but in IREE everything is a program. Whether the program is stateful or stateless and how the program is invoked is up to the program author.
"},{"location":"developers/design-docs/invocation-execution-model/#invocation","title":"Invocation","text":"An invocation represents a single call into a module exported function using the program state stored in a context. Users can decide whether to perform synchronous blocking invocations or asynchronous non-blocking invocations per-call; the behavior of the invocation is independent from the target function and a user program may contain a mix of both.
As an example a user program may synchronously invoke a @query_output_shapes
function to preallocate storage for an asynchronous @execute_in_place
function to write into.
"},{"location":"developers/design-docs/invocation-execution-model/#timeline","title":"Timeline","text":"A timeline represents the observable order of execution. Users define their own timelines and communicate them to IREE via fences. Timelines do not match up with the order of invocations unless the user dictates they must by way of fences. In the absence of fences all invocations execute in an arbitrary order and they may execute concurrently just as threads in C with no barriers.
Each timeline can be thought of as an independent clock domain that may operate asynchronously at its own frequency with only fences acting to tie separate timelines together. This directly mirrors real hardware constraints like clock domain crossing as each execution scope (thread on core, driver calls to queues, kernel queues to device queues, device queues to compute unit queues, etc) is naturally operating at different rates and well-designed systems must tolerate that variability.
"},{"location":"developers/design-docs/invocation-execution-model/#fence","title":"Fence","text":"A fence is a specific point of progress in one or more timelines acting as a barrier, fork, or join point. Fences only guard execution ordering and not any particular resources though users can use them to guard resources by defining when in time the resources are available for use.
Waits on fences are wait-until operations specifying that the timeline must reach at least a specific point. This allows for flexible reordering and deferral of execution as executors can pull forward scheduled work based on policy (run similar work together, etc).
"},{"location":"developers/design-docs/invocation-execution-model/#hardware-abstraction-layer-hal","title":"Hardware Abstraction Layer (HAL)","text":"The HAL is an optional feature of IREE that is used to provide a consistent interface across execution resources. It is used internally by IREE programs to define and submit work to devices and signal across them but may also be used by users to directly interface with hardware in a compatible way. Exposing the HAL API allows for users to efficiently manage their data and custom execution without expensive marshaling. Most users will only interact with HAL buffers as they work with their data but more advanced integrations can directly insert IREE into existing device contexts to transparently share scheduling and resources or insert their own code into IREE to pipeline custom execution.
"},{"location":"developers/design-docs/invocation-execution-model/#execution-by-timelines","title":"Execution by Timelines","text":"NOTE: this defines an execution scheme that IREE supports but a user may use one or more such schemes in a single program - just as a C application may mix single- and multi-threaded code within itself for different components.
The combination of invocations, timelines, and fences allows users to provide future knowledge to lower layers of the system by declaring their availability requirements and the lower layers are then able to execute the work out-of-order so long as the specified requirements are met. The primary goal when designing for such a system is to specify as few requirements as possible in order to provide the maximum amount of scheduling freedom to the implementation.
This makes timelines one of the most critical components of the interface. The purpose of invocations is to schedule work against one or more timelines and what happens within the invocations is an implementation detail of the program.
"},{"location":"developers/design-docs/invocation-execution-model/#sequential-execution","title":"Sequential Execution","text":"Here we say \"a user invokes a function to schedule execution on a timeline\" vs. a more traditional \"a user invokes a function to execute work\" and this manifests in the IREE ABI as invocations taking fences defining specific points on timelines of which the user may observe:
# Fences are effectively just timeline + integer tuples and are cheap to hold.\nwait_fence = my_timeline.at(t)\nsignal_fence = my_timeline.at(t+1)\n# Schedule work against the timeline.\n# All work prior to t must complete before execution can occur and after\n# execution the timeline will advance to t+1.\nasync_invoke(@some_fn, wait_fence, signal_fence)\n# The invocation may have returned immediately after the work was scheduled;\n# until the fence is reached no actual execution may have occurred. To\n# synchronize the user code with the timeline the user can block until the fence\n# is reached.\nsignal_fence.wait()\n
To the user this would appear as:
sequenceDiagram\n User->>@some_func: invoke\n activate @some_func\n @some_func->>User: ;\n @some_func-->>@some_func: wait t\n @some_func-->>User: signal t+1\n deactivate @some_func
This means from the user's perspective the actual operations performed by the invocation are not important: the only thing the user can observe in this situation is when the timeline reaches t+1
as they specified. Whether internally the invocation needs many steps to complete as there are timelines internal to the program is an implementation detail. Actual execution may look like this:
sequenceDiagram\n User->>@some_func: invoke\n activate @some_func\n @some_func->>User: ;\n @some_func->>@some_func: ;\n @some_func-->>Device A: ;\n Device A-->>Device A: wait t\n activate Device A\n @some_func->>@some_func: ;\n @some_func-->>Device B: ;\n activate Device B\n @some_func->>@some_func: ;\n Device A-->>@some_func: ;\n deactivate Device A\n @some_func->>@some_func: ;\n @some_func-->>Device B: ;\n activate Device B\n deactivate @some_func\n Device B-->>User: signal t+1\n deactivate Device B\n deactivate Device B
Even in this simple user-synchronous example the system is able to internally run several concurrent timelines with a minimal number of synchronization points and the lowest possible latency as the user is immediately notified without any intermediate layers needing to be woken, scheduled, executed, and passed on.
"},{"location":"developers/design-docs/invocation-execution-model/#pipelined-execution","title":"Pipelined Execution","text":"The true power of timelines comes from the ability to pipeline execution. Users define DAGs with fences and can construct arbitrarily complex execution topologies whether from the same program or across multiple programs:
stateDiagram\n direction LR\n state fence0 <<fork>>\n [*] --> fence0\n fence0 --> @fn0\n state fence1 <<fork>>\n @fn0 --> fence1\n fence1 --> @fn1\n fence1 --> @fn2\n state fence2 <<join>>\n @fn1 --> fence2\n @fn2 --> fence2\n @fn3 --> fence2\n fence0 --> @fn4\n @fn4 --> fence2\n fence2 --> [*]
This is a simple extension to the synchronous example using the same primitives:
# Timeline is defined by the user.\nfence_a = my_timeline.at(t)\nfence_b = my_timeline.at(t+1)\nfence_c = my_timeline.at(t+2)\n# Invocations are launched using the fences and may not complete immediately.\nasync_invoke(@fn0, fence_a, fence_b)\nasync_invoke(@fn1, fence_b, fence_c)\nasync_invoke(@fn2, fence_b, fence_c)\nasync_invoke(@fn3, None, fence_c)\nasync_invoke(@fn4, fence_a, fence_c)\n# Blocking here but no need to; could pass fence_c on to other invocations.\nfence_c.wait()\n
The critical point of this being that the user never had to wait for any particular invocation to complete before being able to schedule more work against the timeline, even if those invocations could themselves not complete synchronously. The lower layers of the system are able to fully model the execution as early as possible without needing to communicate (and importantly synchronize) with the user.
"},{"location":"developers/design-docs/invocation-execution-model/#io","title":"I/O","text":"Users define the semantics of their programs themselves. For example if the user knows the precise shape of an output buffer they can preallocate the buffer and pass it in. If they don't know they can decide to factor out the shape calculation and invoke that synchronously in order to compute the shape, allocate the appropriately sized buffer, and pass that in. Or they could decide to only deal with synchronous invocations and return a program-allocated buffer view with the appropriate shape in their callback. IREE does not dictate the design of user programs and as such enables mixed stateful/stateless, asynchronous/synchronous, and arbitrary scheduling models (enqueue/drain, windowing, etc).
Inputs and outputs to invocations are provided by the user as primitive values (integers, floats, etc), supported builtin types (lists, byte buffers/strings), custom user types, and HAL types like buffers or buffer views (buffers + shape and type metadata). One or more wait fences can be used to order invocation access to one or more inputs by indicating that the resource is not available until a certain fence is reached. Similarly one or more signal fences can be used to order subsequent access to the resources by indicating the advancement of the timeline when they are available.
# wait_fence_a must be reached before buffer_a and buffer_b can be read.\n# wait_fence_b must be reached before buffer_c can be read.\n# buffer_a will be ready to read when signal_fence_a has been reached.\nasync_invoke(@fn,\n (wait_fence_a, buffer_a, buffer_b),\n 42, # no ordering required on value types\n (wait_fence_b, buffer_c),\n (signal_fence_a, buffer_a))\n
The above example demonstrates an in-place operation on buffer_a
. It's also possible for invocations to return values:
result = invoke(@sum, 1, 2) # = 3\n
When executed asynchronously a callback or any construct that can be built upon them (like promises/futures) can receive the results:
def my_callback(result):\n print(result) # 3\nasync_invoke(@sum, 1, 2, my_callback)\n
"},{"location":"developers/design-docs/invocation-execution-model/#stream-ordered-allocations","title":"Stream-ordered Allocations","text":"Invocations generally have only a few KB of overhead and pipelined command buffers take only a small amount more. Storage buffers, however, can easily take hundreds of MB per invocation for I/O and transient state. This compounds as program usage becomes more complex or multiple programs are involved. IREE supports traditional host-ordered allocations (\u00e0 la malloc/free) for persistent buffers like large constants/read-only data or user-managed ringbuffers. Stream-ordered allocations are also supported to allow for pooled buffer reservations that can be allocated in a scheduled order alongside program execution.
For more detailed examples see the CUDA blog posts describing their implementation: part 1, part 2.
With stream-ordered allocations each allocation and deallocation operation is scheduled with wait and signal fences just as with invocations. This allows these allocation operations to execute remotely on device without host program involvement. For example, scheduling alloca0
/dealloca0
and alloca1
/dealloca1
interleaved with the function execution allows for the transient memory required for executing @fn0
to remain uncommitted until immediately before it is executed, committed during execution, and then decommitted immediately after execution. The memory required for passing data from @fn0
to the subsequent @fn1
and @fn2
survives until after they have completed executing before being decommitted. By using the same scheduling primitives as execution the allocation topology can be as arbitrarily complex as the invocation topology:
stateDiagram\n direction LR\n state fence0a <<fork>>\n [*] --> fence0a\n state fence0b <<fork>>\n fence0a --> alloca0\n fence0a --> alloca1\n alloca0 --> fence0b\n alloca1 --> fence0b\n fence0b --> @fn0\n state fence1a <<fork>>\n @fn0 --> fence1a\n state fence1b <<fork>>\n fence1a --> dealloc0\n dealloc0 --> fence1b\n fence1b --> @fn1\n fence1b --> @fn2\n state fence2a <<join>>\n @fn1 --> fence2a\n @fn2 --> fence2a\n state fence2b\n fence2a --> dealloc1\n state fence2b <<join>>\n dealloc1 --> fence2b\n fence2b --> [*]
When operating in this way allocations from the host-perspective are just reservations for a slice of pooled storage that will be committed at some point in the future. Likewise deallocations from the host-perspective release the prior reservation and schedule the paired decommit at some point in the future. Scheduling N sequential invocations thus requires only enough committed storage for a single invocation in addition to the I/O (unless that too is stream-ordered).
This scheduling behavior allows for both minimal peak memory consumption regardless of the number of programs or invocation pipeline depth and sharing of committed storage across programs: the memory consumption of a program at rest is near zero when stateless and the sum of all state when stateful. Target devices that natively support stream-ordered allocations (like CUDA) can even share pools across processes.
The other provided feature in combination with the fence guaranteed forward progress is that so long as the memory pool can service a single request execution can still continue even when constrained. A device can serialize two independent invocations requiring 400MB of transient memory when the system only has 512MB available with no user-visible impact besides increased latency. This does require the user to ensure they schedule work that is possible to run or rely on the target system having paging in order to lighten the strictness of the pool quotas.
Stream-ordered allocations performed by the user for invocation inputs can be declared as transferred to the program. This allows the program to eagerly deallocate or reuse the input storage while still preserving the internal scheduling requirements of the program.
"},{"location":"developers/design-docs/invocation-execution-model/#internal-state","title":"Internal State","text":"A stateful program may contain internal timelines that it uses to order its own execution. Take for example this simple stateful program:
class TrivialKernel(Program):\n _x0 = Program.export_global(x_type)\n def get(self):\n return self._x0\n def set(self, x=x_type):\n self._x0 = x\n def matmul(self, x=y_type):\n self._x0 = self._matmul(x, self._x0)\n @Program.kernel\n def _matmul(x, x0):\n return jnp.matmul(x, x0)\n
Each invocation of matmul
needs to be executed in-order with prior invocations as there is a data dependency established on self._x0
. Attempts to get
or set
must also be sequenced correctly with the matmul
invocations. A basic usage like this:
m = TrivialKernel()\nm.set(input)\nm.matmul(a)\nm.matmul(b)\nm.matmul(c)\noutput = m.get()\nprint(output) # implicit wait\n
Would be executed as:
sequenceDiagram\n activate User\n User->>TrivialKernel: @set(input)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n activate Device\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(a)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(b)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @matmul(c)\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n User->>TrivialKernel: @get()\n activate TrivialKernel\n TrivialKernel-->>Device: ;\n deactivate TrivialKernel\n TrivialKernel->>User: ;\n Device-->>Device: ;\n deactivate User\n User->>User: (wait)\n Device-->>User: (signal)\n deactivate Device\n activate User\n User->>User: print(output)\n deactivate User
Note that although the user provided no timeline of their own execution is still ordered correctly due to the internal timeline constructed by the program. If the user wanted to also pipeline execution with another program they could do so by providing their own fences.
"},{"location":"developers/general/contributing/","title":"Contributing to IREE","text":"We'd love to accept your patches and contributions to this project.
Please file issues or reach out on any of our other communication channels before doing substantial work; this will ensure that others don't duplicate the work and that there's a chance to discuss any design issues.
"},{"location":"developers/general/contributing/#developer-policies","title":"Developer policies","text":""},{"location":"developers/general/contributing/#code-of-conduct","title":"Code of conduct","text":"This project follows the OpenXLA Code of Conduct.
"},{"location":"developers/general/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"Contributions to this project must be accompanied by a Contributor License Agreement (CLA). Head over to https://cla.developers.google.com/ to see your current agreements on file or to sign a new one.
- You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project.
- You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again.
"},{"location":"developers/general/contributing/#coding-style-guidelines","title":"Coding style guidelines","text":"Most of the code style is derived from the Google Style Guides for the appropriate language and is generally not something we accept changes on (as clang-format and other linters set that for us). The C++ compiler portion of the project follows the MLIR/LLVM style guide.
Improvements to code structure and clarity are welcome but please file issues to track such work first. Pure style changes are unlikely to be accepted unless they are applied consistently across the project.
Tip - code formatters and lint scripts Formatters like clang-format
(C/C++) and Black (Python) can be set to run automatically in your editor of choice.
The script at build_tools/scripts/lint.sh
can also be used to run the full suite of lint checks.
"},{"location":"developers/general/contributing/#code-reviews","title":"Code reviews","text":"All submissions, including submissions by maintainers, require review. We use GitHub pull requests (PRs) for this purpose. Consult GitHub Help for more information on using pull requests.
- Please keep PRs small (focused on a single issue) to make reviews and later culprit-finding easier.
- You may see trusted core contributors bending this rule for project maintenance and major subsystem renovation. If you feel like the rules aren't working for a certain situation, please ask as we bias towards pragmatism for cases that require it.
"},{"location":"developers/general/contributing/#github-actions-workflows","title":"GitHub Actions workflows","text":"We use GitHub Actions to automatically build and test various parts of the project.
- Most presubmit workflows will only run automatically on PRs if you are a project collaborator. Otherwise a maintainer must approve workflow runs. If you are sending code changes to the project, please ask to be added as a collaborator, so that these can run automatically.
- It is generally expected that PRs will only be merged when all checks are passing. In some cases, pre-existing failures may be bypassed by a maintainer.
Tip - adjusting workflow behavior Some workflows only run on commits after they are merged. See the CI behavior manipulation section below to learn how to customize this behavior.
"},{"location":"developers/general/contributing/#merging-approved-changes","title":"Merging approved changes","text":"After review and presubmit checks, PRs should typically be merged using \"squash and merge\".
- The squashed commit summary should match the PR title and the commit description should match the PR body (this is the default behavior). Accordingly, please write these as you would a helpful commit message.
It is assumed that the PR author will merge their change unless they ask someone else to merge it for them (e.g. because they don't have write access yet).
"},{"location":"developers/general/contributing/#obtaining-commit-access","title":"Obtaining commit access","text":"Access to affiliated repositories is divided into three tiers:
Tier Description Team link Triage New project members should typically start here Can be assigned issues Can apply labels to issues / PRs Can run workflows without approval iree-triage Write Established project contributors should request this access Can merge approved pull requests Can create branches iree-write Maintain Can edit repository settings Can push to protected branches iree-maintain All access tiers first require joining the OpenXLA GitHub organization.
Fill out this form to request access
Once you are a member of the OpenXLA GitHub organization, you can request to join any of the teams on https://github.com/orgs/openxla/teams.
Note: other GitHub organizations
Work on IREE sometimes spans other GitHub organizations like iree-org and shark-infra. Reach out to a project member if you would also like access to repositories in those organizations.
"},{"location":"developers/general/contributing/#credits-in-the-authors-file","title":"Credits in the AUTHORS file","text":"If you would like additional recognition for your contributions, you may add yourself or your organization to the AUTHORS file that keeps track of those who have made significant contributions to the project.
- Please add the entity who owns the copyright for your contribution.
- The source control history remains the most accurate source for individual contributions.
"},{"location":"developers/general/contributing/#tips-for-contributors","title":"Tips for contributors","text":""},{"location":"developers/general/contributing/#tool-recommendations","title":"Tool recommendations","text":"Program or tool Description Visual Studio Code (VSCode) The most commonly used editor amongst IREE developers Ccache A fast C/C++ compiler cache. See the CMake with ccache
page GitHub CLI A CLI for interacting with GitHub \"Refined GitHub\" browser extensions Extension that add features to the GitHub UI"},{"location":"developers/general/contributing/#build-systems","title":"Build systems","text":"IREE supports building from source with both Bazel and CMake.
- CMake is the preferred build system and offers the most flexible configuration options
- Bazel is a stricter build system and helps with usage in Google's downstream source repository
- Certain dependencies (think large/complex projects like CUDA, TensorFlow, PyTorch, etc.) may be difficult to support with one build system or the other, so the project may configure these as optional
"},{"location":"developers/general/contributing/#continuous-integration-ci","title":"Continuous integration (CI)","text":"IREE uses GitHub Actions for CI. The primary CI is configured in the ci.yml workflow file.
"},{"location":"developers/general/contributing/#self-hosted-runners","title":"Self-hosted runners","text":"In addition to the default runners GitHub provides, IREE uses self-hosted runners to run many of its workflow jobs. These enable access to additional compute and custom configurations such as accelerators. Configuration scripting is checked in to this repository (see the README for that directory).
"},{"location":"developers/general/contributing/#custom-managed-runners","title":"Custom managed runners","text":"In addition to our self-hosted runners, we use GitHub's large managed runners for some platforms.
"},{"location":"developers/general/contributing/#ci-behavior-manipulation","title":"CI behavior manipulation","text":"The setup step of the CI determines which CI jobs to run. This is controlled by the configure_ci.py script. It will generally run a pre-determined set of jobs on presubmit with some jobs kept as post-submit only. If changes are only to a certain set of excluded files that we know don't affect CI (e.g. Markdown files), then it will skip the jobs.
You can customize which jobs run using git trailers in the PR description.
The available options are
ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\nskip-ci: free form reason\nskip-llvm-integrate-benchmark: free form reason\nbenchmark-extra: extra,benchmarks,to,run\nrunner-env: [testing|prod]\n
Using skip-ci
skip-ci
skips all jobs. It is mutually exclusive with the other ci-*
options and is synonomous with ci-skip: all
.
skip-ci: free form reason\n
Using ci-skip
, ci-extra
, ci-exactly
The ci-*
options instruct the setup script on which jobs to include or exclude from its run. They take a comma-separated list of jobs which must be from the set of top-level job identifiers in the ci.yml
file or the special keyword \"all\" to indicate all jobs.
ci-skip: jobs,to,skip\nci-extra: extra,jobs,to,run\nci-exactly: exact,set,of,jobs,to,run\n
ci-skip
removes jobs that would otherwise be included, though it is not an error to list jobs that would not be included by default. ci-extra
adds additional jobs that would not have otherwise been run, though it is not an error to list jobs that would have been included anyway. It is an error to list a job in both \"skip\" and \"extra\". ci-exactly
provides an exact list of jobs that should run. It is mutually exclusive with both \"skip\" and \"extra\".
In all these cases, the setup does not make any effort to ensure that job dependencies are satisfied. Thus, if you request skipping the build_all
job, all the jobs that depend on it will fail, not be skipped.
Using benchmark-extra
, skip-llvm-integrate-benchmark
benchmark-extra: extra,benchmarks,to,run\nskip-llvm-integrate-benchmark: free form reason\n
Benchmarks don't run by default on PRs, and must be specifically requested.
The benchmark-extra
option allows specifying additional benchmark presets to run as part of benchmarking. It accepts a comma-separated list of benchmark presets. This combines with labels added to the PR (which are a more limited set of options). See the benchmark suites documentation.
Benchmarks do run by default on PRs detected to be an integration of LLVM into IREE, but this behavior can be disabled with skip-llvm-integrate-benchmark
.
Using runner-env
The runner-env
option controls which runner environment to target for our self-hosted runners. We maintain a test environment to allow testing out new configurations prior to rolling them out. This trailer is for advanced users who are working on the CI infrastructure itself.
runner-env: [testing|prod]\n
"},{"location":"developers/general/contributing/#ci-configuration-recipes","title":"CI configuration recipes","text":"Copy/paste any of these at the bottom of a PR description to change what the CI runs.
-
Also run Windows and macOS builds that are normally post-merge only:
ci-extra: build_test_all_windows,build_test_all_macos_arm64,build_test_all_macos_x86_64\n
-
Also run GPU tests on NVIDIA A100 runners (opt-in due to low availability):
ci-extra: test_a100\n
-
Skip all CI builds and tests, e.g. for comment-only changes:
skip-ci: Comment-only change.\n
-
Only run Bazel builds, e.g. for changes only affecting Bazel rules:
ci-exactly: build_test_all_bazel\n
For example, this PR opted in to running the build_test_all_windows
job:
The enabled jobs can be viewed from the Summary page of an action run:
"},{"location":"developers/general/contributing/#git-workflows","title":"Git workflows","text":"We tend to use the \"triangular\" or \"forking\" workflow. Develop primarily on a clone of the repository on your development machine. Any local branches named the same as persistent branches from the main repository are pristine (though potentially stale) copies. You only fastforward these to match upstream and otherwise do development on other branches. When sending PRs, you push to a different branch on your public fork and create the PR from there.
"},{"location":"developers/general/contributing/#setup","title":"Setup","text":" -
Create a fork of the main repository.
-
Create a local git repository with remotes upstream
(the main repository) and origin
(your personal fork). To list your current remotes git remote -v
.
a. If you already cloned from the main repository (e.g. by following the getting started guide):
# From your existing git repo\n$ git remote rename origin upstream\n$ git remote add origin https://github.com/<github_username>/iree.git\n
b. If you haven't already cloned:
# From whatever directory under which you want to nest your repo\n$ git clone https://github.com/<github_username>/iree.git\n$ cd iree\n$ git remote add upstream https://github.com/openxla/iree.git\n
This is especially important for maintainers who have write access (so can push directly to the main repository) and admins who have elevated privileges (so can push directly to protected branches).
These names are just suggestions, but you might find some scripts where the defaults are for remotes named like this.
For extra safety, you can make it difficult to push directly to upstream by setting the push url to something invalid: git remote set-url --push upstream DISABLE
, which requires re-enabling the push URL explicitly before pushing. You can wrap this behavior in a custom git command like git-sudo.
-
Use a script like git_update.sh to easily synchronize main
with upstream
. Submodules make this is a little trickier than it should be. You can also turn this into a git command by adding it to your path as git-update
.
"},{"location":"developers/general/contributing/#git-config","title":"Git config","text":"These are some additional options you could put in your top-level .gitconfig
or repository-specific .git/config
files that are conducive the recommended workflow
[push]\ndefault = current\n[alias]\n# Delete branches that you pushed and have been deleted upstream, e.g. because\n# the PR was merged.\ngone = ! \"git fetch -p && git for-each-ref --format '%(refname:short) %(upstream:track)' | awk '$2 == \\\"[gone]\\\" {print $1}' | xargs -r git branch -D\"\n# Update from upstream (custom command) and delete obsolete local branches.\nsync = ! (git update main && git gone)\n# Create a new branch based off of main (requires a clean working directory).\nnew = \"!f(){ \\\\\\ngit checkout main && git switch -c $1; \\\\\\n}; f\"\n# Display branches in a useful \"latest last\" format\nbr = for-each-ref --sort=committerdate refs/heads/ --format='%(HEAD) %(color:yellow)%(refname:short)%(color:reset) - %(color:red)%(objectname:short)%(color:reset) - %(contents:subject) (%(color:green)%(committerdate:relative)%(color:reset))'\n# `git git foo` -> `git foo` typo fixer\ngit = \"!f(){ \\\\\\n git \\\"$@\\\"; \\\\\\n}; f\"\n# Get the git root directory\nroot = rev-parse --show-toplevel\n# checkout, but also sync submodules\nch = \"!f() { \\\\\\n git checkout \\\"$@\\\"; git submodule sync && git submodule update --init; \\\\\\n}; f\"\n# See the diff for a PR branch vs the main branch\ndiffmain = diff --merge-base main\n# See only the files that differ vs the main branch\nwhatsout = diffmain --name-only\n[checkout]\n# If the checkout command\ndefaultRemote = origin\n[pull]\n# When pulling, only complete the pull if its a clean fast forward.\nff = only\n[remote]\n# Push to your fork (origin) by default\npushDefault = origin\n[url \"ssh://git@github.com/\"]\n# Pull with https (so no auth required), but push with ssh.\npushInsteadOf = https://github.com/\n
"},{"location":"developers/general/developer-overview/","title":"Developer overview","text":"This guide provides an overview of IREE's project structure and main tools for developers.
"},{"location":"developers/general/developer-overview/#project-code-layout","title":"Project code layout","text":" - /compiler/: MLIR dialects, LLVM compiler passes, module translation code, etc.
- bindings/: Python and other language bindings
- /runtime/: Standalone runtime code including the VM and HAL drivers
- bindings/: Python and other language bindings
- /integrations/: Integrations between IREE and other frameworks, such as TensorFlow
- /tests/: Tests for full compiler->runtime workflows
- /tools/: Developer tools (
iree-compile
, iree-run-module
, etc.) - /samples/: Also see the separate https://github.com/iree-org/iree-samples repository
"},{"location":"developers/general/developer-overview/#iree-compiler-code-layout","title":"IREE compiler code layout","text":" - API/: Public C API
- Codegen/: Code generation for compute kernels
- Dialect/: MLIR dialects (
Flow
, HAL
, Stream
, VM
, etc.) - InputConversion/: Conversions from input dialects and preprocessing
"},{"location":"developers/general/developer-overview/#iree-runtime-code-layout","title":"IREE runtime code layout","text":" - base/: Common types and utilities used throughout the runtime
- hal/: Hardware Abstraction Layer for IREE's runtime, with implementations for hardware and software backends
- schemas/: Data storage format definitions, primarily using FlatBuffers
- task/: System for running tasks across multiple CPU threads
- tooling/: Utilities for tests and developer tools, not suitable for use as-is in downstream applications
- vm/: Bytecode Virtual Machine used to work with IREE modules and invoke IREE functions
"},{"location":"developers/general/developer-overview/#developer-tools","title":"Developer tools","text":"IREE's core compiler accepts programs in supported input MLIR dialects (e.g. stablehlo
, tosa
, linalg
). Import tools and APIs may be used to convert from framework-specific formats like TensorFlow SavedModel to MLIR modules. While programs are ultimately compiled down to modules suitable for running on some combination of IREE's target deployment platforms, IREE's developer tools can run individual compiler passes, translations, and other transformations step by step.
"},{"location":"developers/general/developer-overview/#iree-opt","title":"iree-opt","text":"iree-opt
is a tool for testing IREE's compiler passes. It is similar to mlir-opt and runs sets of IREE's compiler passes on .mlir
input files. See \"conversion\" in MLIR's Glossary for more information. Transformations performed by iree-opt
can range from individual passes performing isolated manipulations to broad pipelines that encompass a sequence of steps.
Test .mlir
files that are checked in typically include a RUN
block at the top of the file that specifies which passes should be performed and if FileCheck
should be used to test the generated output.
Here's an example of a small compiler pass running on a test file:
$ ../iree-build/tools/iree-opt \\\n--split-input-file \\\n--mlir-print-ir-before-all \\\n--iree-util-drop-compiler-hints \\\n$PWD/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir\n
For a more complex example, here's how to run IREE's complete transformation pipeline targeting the VMVX backend on the fullyconnected.mlir model file:
$ ../iree-build/tools/iree-opt \\\n--iree-transformation-pipeline \\\n--iree-hal-target-backends=vmvx \\\n$PWD/tests/e2e/stablehlo_models/fullyconnected.mlir\n
"},{"location":"developers/general/developer-overview/#iree-compile","title":"iree-compile","text":"iree-compile
is IREE's main compiler driver for generating binaries from supported input MLIR assembly.
For example, to translate simple.mlir
to an IREE module:
$ ../iree-build/tools/iree-compile \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_vmvx.vmfb\n
"},{"location":"developers/general/developer-overview/#iree-run-module","title":"iree-run-module","text":"The iree-run-module
program takes an already translated IREE module as input and executes an exported main function using the provided inputs.
This program can be used in sequence with iree-compile
to translate a .mlir
file to an IREE module and then execute it. Here is an example command that executes the simple simple_abs_vmvx.vmfb
compiled from simple_abs.mlir
above on IREE's VMVX driver:
$ ../iree-build/tools/iree-run-module \\\n--module=/tmp/simple_abs_vmvx.vmfb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
"},{"location":"developers/general/developer-overview/#iree-check-module","title":"iree-check-module","text":"The iree-check-module
program takes an already translated IREE module as input and executes it as a series of googletest tests. This is the test runner for the IREE check framework.
$ ../iree-build/tools/iree-compile \\\n--iree-input-type=stablehlo \\\n--iree-hal-target-backends=vmvx \\\n$PWD/tests/e2e/xla_ops/abs.mlir \\\n-o /tmp/abs.vmfb\n
$ ../iree-build/tools/iree-check-module \\\n--device=local-task \\\n--module=/tmp/abs.vmfb\n
"},{"location":"developers/general/developer-overview/#iree-run-mlir","title":"iree-run-mlir","text":"The iree-run-mlir
program takes a .mlir
file as input, translates it to an IREE bytecode module, and executes the module.
It is designed for testing and debugging, not production uses, and therefore does some additional work that usually must be explicit, like marking every function as exported by default and running all of them.
For example, to execute the contents of samples/models/simple_abs.mlir:
# iree-run-mlir <compiler flags> [input.mlir] <runtime flags>\n$ ../iree-build/tools/iree-run-mlir \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n--input=f32=-2\n
"},{"location":"developers/general/developer-overview/#iree-dump-module","title":"iree-dump-module","text":"The iree-dump-module
program prints the contents of an IREE module FlatBuffer file.
For example, to inspect the module translated above:
../iree-build/tools/iree-dump-module /tmp/simple_abs_vmvx.vmfb\n
"},{"location":"developers/general/developer-overview/#useful-generic-flags","title":"Useful generic flags","text":""},{"location":"developers/general/developer-overview/#read-inputs-from-a-file","title":"Read inputs from a file","text":"All the IREE tools support reading input values from a file. This is quite useful for debugging. Use --help
for each tool to see what the flag to set. The inputs are expected to be newline-separated. Each input should be either a scalar or a buffer. Scalars should be in the format type=value
and buffers should be in the format [shape]xtype=[value]
. For example:
1x5xf32=1,-2,-3,4,-5\n1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1\n
"},{"location":"developers/general/developer-overview/#-iree-flow-trace-dispatch-tensors","title":"--iree-flow-trace-dispatch-tensors
","text":"This flag will enable tracing inputs and outputs for each dispatch function. It is easier to narrow down test cases, since IREE breaks a ML workload into multiple dispatch function. When the flag is on, IREE will insert trace points before and after each dispatch function. The first trace op is for inputs, and the second trace op is for outputs. There will be two events for one dispatch function.
"},{"location":"developers/general/developer-tips/","title":"Developer tips and tricks","text":"The IREE compiler is built using MLIR, so it naturally supports the common MLIR debugging workflows. For areas where IREE differentiates itself, this page lists other helpful tips and tricks.
"},{"location":"developers/general/developer-tips/#setting-compiler-options","title":"Setting compiler options","text":"Tools such as iree-compile
take options via command-line flags. Pass --help
to see the full list:
$ iree-compile --help\n\nOVERVIEW: IREE compilation driver\n\nUSAGE: iree-compile [options] <input file or '-' for stdin>\n\nOPTIONS:\n ...\n
Tip - Options and the Python bindings
If you are using the Python bindings, options can be passed via the extra_args=[\"--flag\"]
argument:
import iree.compiler as ireec\n\ninput_mlir = \"\"\"\nfunc.func @abs(%input : tensor<f32>) -> (tensor<f32>) {\n%result = math.absf %input : tensor<f32>\n return %result : tensor<f32>\n}\"\"\"\n\ncompiled_module = ireec.tools.compile_str(\n input_mlir,\n target_backends=[\"llvm-cpu\"],\nextra_args=[\"--mlir-timing\"])\n
"},{"location":"developers/general/developer-tips/#inspecting-vmfb-files","title":"Inspecting .vmfb
files","text":"The IREE compiler generates FlatBuffer files using the .vmfb
file extension, short for \"Virtual Machine FlatBuffer\", which can then be loaded and executed using IREE's runtime.
Info - other output formats The IREE compiler can output different formats with the `--output-format=
flag:
Flag value Output --output-format=vm-bytecode
(default) VM Bytecode (.vmfb
) files --output-format=vm-c
C source modules VM Bytecode files are usable across a range of deployment scenarios, while C source modules provide low level connection points for constrained environments like bare metal platforms.
By default, .vmfb
files can be opened as zip files: (1)
- Setting
--iree-vm-emit-polyglot-zip=false
will disable this feature and decrease file size slightly
$ unzip -d simple_abs_cpu ./simple_abs_cpu.vmfb\n\nArchive: ./simple_abs_cpu.vmfb\n extracting: simple_abs_cpu/module.fb\n extracting: simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n
The embedded binary (here an ELF shared object with CPU code) can be parsed by standard tools:
$ readelf -Ws ./simple_abs_cpu/abs_dispatch_0_system_elf_x86_64.so\n\nSymbol table '.dynsym' contains 2 entries:\n Num: Value Size Type Bind Vis Ndx Name\n 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND\n 1: 0000000000001760 17 FUNC GLOBAL DEFAULT 7 iree_hal_executable_library_query\n\nSymbol table '.symtab' contains 42 entries:\n Num: Value Size Type Bind Vis Ndx Name\n 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND\n 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS abs_dispatch_0\n 2: 0000000000001730 34 FUNC LOCAL DEFAULT 7 abs_dispatch_0_generic\n 3: 00000000000034c0 80 OBJECT LOCAL DEFAULT 8 iree_hal_executable_library_query_v0\n 4: 0000000000001780 111 FUNC LOCAL DEFAULT 7 iree_h2f_ieee\n 5: 00000000000017f0 207 FUNC LOCAL DEFAULT 7 iree_f2h_ieee\n ...\n
The iree-dump-module
tool can also be used to see information about a given .vmfb
file:
$ iree-dump-module simple_abs.vmfb\n\n//===---------------------------------------------------------------------===//\n// @module : version 0\n//===---------------------------------------------------------------------===//\n\nRequired Types:\n [ 0] i32\n [ 1] i64\n [ 2] !hal.allocator\n [ 3] !hal.buffer\n ...\n\nModule Dependencies:\n hal, version >= 0, required\n\nImported Functions:\n [ 0] hal.ex.shared_device() -> (!vm.ref<?>)\n [ 1] hal.allocator.allocate(!vm.ref<?>, i32, i32, i64) -> (!vm.ref<?>)\n ...\n\nExported Functions:\n [ 0] abs(!vm.ref<?>) -> (!vm.ref<?>)\n [ 1] __init() -> ()\n\n...\n
"},{"location":"developers/general/developer-tips/#dumping-executable-files","title":"Dumping executable files","text":"The --iree-hal-dump-executable-*
flags instruct the compiler to save files related to \"executable translation\" (code generation for a specific hardware target) into a directory of your choosing. If you are interested in seeing which operations in your input program were fused into a compute kernel or what device code was generated for a given program structure, these flags are a great starting point.
Flag Files dumped iree-hal-dump-executable-files-to
All files (meta-flag) iree-hal-dump-executable-sources-to
Source .mlir
files prior to HAL compilation iree-hal-dump-executable-intermediates-to
Intermediate files (e.g. .o
files, .mlir
stages) iree-hal-dump-executable-binaries-to
Binary files (e.g. .so
, .spv
, .ptx
), as used in the .vmfb
iree-hal-dump-executable-benchmarks-to
Standalone benchmark files for iree-benchmark-module
CPUGPU - VulkanGPU - CUDA $ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-link-embedded=false \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_cpu.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_system_elf_x86_64_benchmark.mlir\nmodule_abs_dispatch_0_system_elf_x86_64.codegen.bc\nmodule_abs_dispatch_0_system_elf_x86_64.linked.bc\nmodule_abs_dispatch_0_system_elf_x86_64.optimized.bc\nmodule_abs_dispatch_0_system_elf_x86_64.o\nmodule_abs_dispatch_0_system_elf_x86_64.s\nmodule_abs_dispatch_0_system_elf_x86_64.so\nsimple_abs_cpu.vmfb\n
Tip - Embedded and system linking
The default value of --iree-llvmcpu-link-embedded=true
generates embedded ELF files. By disabling that flag, the compiler will produce platform-standard .so
files for Linux, .dll
files for Windows, etc. While embedded ELF files can be smaller and more portable, inspection of artifacts is easier with platform-standard shared object files.
Tip - Disassembling .bc
files with llvm-dis
The .bc
intermediate files use the LLVM BitCode format, which can be disassembled using llvm-dis
:
// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_system_elf_x86_64.codegen.bc\n$ cat module_abs_dispatch_0_system_elf_x86_64.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_system_elf_x86_64.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\ntarget triple = \"x86_64-linux-gnu\"\n\n%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }\n%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }\n\n...\n\ndefine internal i32 @abs_dispatch_0_generic(\n ptr noalias nonnull align 16 %0,\n ptr noalias nonnull align 16 %1,\n ptr noalias nonnull align 16 %2) #0 {\n %4 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10,\n %6 = load ptr, ptr %5, align 8,\n %7 = ptrtoint ptr %6 to i64,\n %8 = and i64 %7, 63,\n %9 = icmp eq i64 %8, 0,\n call void @llvm.assume(i1 %9),\n %10 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8,\n %11 = extractvalue %iree_hal_executable_dispatch_state_v0_t %10, 10,\n %12 = getelementptr ptr, ptr %11, i32 1,\n %13 = load ptr, ptr %12, align 8,\n %14 = ptrtoint ptr %13 to i64,\n %15 = and i64 %14, 63,\n %16 = icmp eq i64 %15, 0,\n call void @llvm.assume(i1 %16),\n %17 = load float, ptr %6, align 4,\n %18 = call float @llvm.fabs.f32(float %17),\n store float %18, ptr %13, align 4,\n ret i32 0,\n}\n\n...\n
$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=vulkan-spirv \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_vulkan.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb_benchmark.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.mlir\nmodule_abs_dispatch_0_vulkan_spirv_fb.spv\nsimple_abs_vulkan.vmfb\n
Tip - Disassembling .spv
files with spirv-dis
The .spv
files use the SPIR-V binary format, which can be disassembled using spirv-dis
from SPIR-V Tools:
$ cd /tmp/iree/simple_abs/\n$ spirv-dis module_abs_dispatch_0_vulkan_spirv_fb.spv\n\n; SPIR-V\n; Version: 1.0\n; Generator: Khronos; 22\n; Bound: 20\n; Schema: 0\n OpCapability Shader\n OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n %18 = OpExtInstImport \"GLSL.std.450\"\n OpMemoryModel Logical GLSL450\n OpEntryPoint GLCompute %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n OpExecutionMode %abs_dispatch_0_generic LocalSize 1 1 1\n OpName %__resource_var_0_0_ \"__resource_var_0_0_\"\n OpName %__resource_var_0_1_ \"__resource_var_0_1_\"\n OpName %abs_dispatch_0_generic \"abs_dispatch_0_generic\"\n OpDecorate %_arr_float_uint_1 ArrayStride 4\n OpMemberDecorate %_struct_2 0 Offset 0\n OpDecorate %_struct_2 Block\n OpDecorate %__resource_var_0_0_ Binding 0\n OpDecorate %__resource_var_0_0_ DescriptorSet 0\n OpDecorate %__resource_var_0_1_ Binding 1\n OpDecorate %__resource_var_0_1_ DescriptorSet 0\n %float = OpTypeFloat 32\n %uint = OpTypeInt 32 0\n %uint_1 = OpConstant %uint 1\n%_arr_float_uint_1 = OpTypeArray %float %uint_1\n %_struct_2 = OpTypeStruct %_arr_float_uint_1\n%_ptr_StorageBuffer__struct_2 = OpTypePointer StorageBuffer %_struct_2\n%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_2 StorageBuffer\n %void = OpTypeVoid\n %9 = OpTypeFunction %void\n %uint_0 = OpConstant %uint 0\n%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float\n%abs_dispatch_0_generic = OpFunction %void None %9\n %12 = OpLabel\n %15 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %uint_0\n %16 = OpLoad %float %15\n %17 = OpExtInst %float %18 FAbs %16\n %19 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %uint_0\n OpStore %19 %17\n OpReturn\n OpFunctionEnd\n
$ mkdir -p /tmp/iree/simple_abs/\n\n$ iree-compile simple_abs.mlir \\\n--iree-hal-target-backends=cuda \\\n--iree-hal-dump-executable-files-to=/tmp/iree/simple_abs \\\n-o /tmp/iree/simple_abs/simple_abs_cuda.vmfb\n\n$ ls /tmp/iree/simple_abs\n\nmodule_abs_dispatch_0_cuda_nvptx_fb_benchmark.mlir\nmodule_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.linked.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.optimized.bc\nmodule_abs_dispatch_0_cuda_nvptx_fb.ptx\nmodule_abs_dispatch_0.mlir\nsimple_abs_cuda.vmfb\n
Tip - Disassembling .bc
files with llvm-dis
The .bc
intermediate files use the LLVM BitCode format, which can be disassembled using llvm-dis
:
// Build `llvm-dis` from source as needed:\n$ cmake --build iree-build/ --target llvm-dis\n$ iree-build/llvm-project/bin/llvm-dis --help\n\n$ cd /tmp/iree/simple_abs/\n$ llvm-dis module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc\n$ cat module_abs_dispatch_0_cuda_nvptx_fb.codegen.ll\n\n; ModuleID = 'module_abs_dispatch_0_cuda_nvptx_fb.codegen.bc'\nsource_filename = \"abs_dispatch_0\"\n\ndeclare ptr @malloc(i64)\n\ndeclare void @free(ptr)\n\ndeclare float @__nv_fabsf(float)\n\ndefine void @abs_dispatch_0_generic(ptr noalias readonly align 16 %0, ptr noalias align 16 %1) {\n %3 = ptrtoint ptr %0 to i64\n %4 = and i64 %3, 63\n %5 = icmp eq i64 %4, 0\n call void @llvm.assume(i1 %5)\n %6 = ptrtoint ptr %1 to i64\n %7 = and i64 %6, 63\n %8 = icmp eq i64 %7, 0\n call void @llvm.assume(i1 %8)\n %9 = load float, ptr %0, align 4\n %10 = call float @__nv_fabsf(float %9)\n store float %10, ptr %1, align 4\n ret void\n}\n\n!nvvm.annotations = !{!0, !1, !2, !3}\n\n!0 = !{ptr @abs_dispatch_0_generic, !\"kernel\", i32 1}\n!1 = !{ptr @abs_dispatch_0_generic, !\"maxntidx\", i32 1}\n!2 = !{ptr @abs_dispatch_0_generic, !\"maxntidy\", i32 1}\n!3 = !{ptr @abs_dispatch_0_generic, !\"maxntidz\", i32 1}\n
"},{"location":"developers/general/developer-tips/#compiling-phase-by-phase","title":"Compiling phase by phase","text":"IREE compiles programs through a series of broad phases:
graph LR\n accTitle: Compilation phases overview\n accDescr: Input to ABI to Flow to Stream to HAL to VM\n\n A([Input])\n A --> B([ABI])\n B --> C([Flow])\n C --> D([Stream])\n D --> E([HAL])\n E --> F([VM])
Tip - available phases These are the phase names available for use with the --compile-to
and --compile-from
flags described below:
Phase name Description input
Performs input processing and lowering into core IREE input dialects (linalg/etc) abi
Adjusts the program ABI for the specified execution environment preprocessing
Applies customizable preprocessing
prior to FLow/Stream/HAL/VM flow
Models execution data flow and partitioning using the flow
dialect stream
Models execution partitioning and scheduling using the stream
dialect executable-sources
Prepares hal
dialect executables for translation, prior to codegen executable-targets
Runs code generation for hal
dialect executables hal
Finishes hal
dialect processing vm
Lowers to IREE's abstract virtual machine using the vm
dialect end
Completes the full compilation pipeline For an accurate list of phases, see the source code or check the help output with a command such as:
iree-compile --help | sed -n '/--compile-to/,/--/p' | head -n -1\n
You can output a program snapshot at intermediate phases with the --compile-to=<phase name>
flag:
$ cat simple_abs.mlir\n\nfunc.func @abs(%input : tensor<f32>) -> (tensor<f32>) {\n %result = math.absf %input : tensor<f32>\n return %result : tensor<f32>\n}\n\n$ iree-compile simple_abs.mlir --compile-to=abi\n\nmodule {\n func.func @abs(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {\n %0 = hal.tensor.import %arg0 \"input 0\" : !hal.buffer_view -> tensor<f32>\n %1 = math.absf %0 : tensor<f32>\n %2 = hal.tensor.export %1 \"output 0\" : tensor<f32> -> !hal.buffer_view\n return %2 : !hal.buffer_view\n }\n}\n
This is similar to the --mlir-print-ir-after=
flag, but at clearly defined pipeline phases.
Compilation can be continued from any intermediate phase. This allows for interative workflows - compile to a phase, make edits to the .mlir
file, then resume compilation and continue through the pipeline:
$ iree-compile simple_abs.mlir --compile-to=abi -o simple_abs_abi.mlir\n\n$ sed \\\n-e 's/math.absf/math.exp/' \\\n-e 's/@abs/@exp/' \\\nsimple_abs_abi.mlir > simple_exp_abi.mlir\n\n$ iree-compile simple_exp_abi.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n-o simple_exp_cpu.vmfb\n
or explicitly resume from an intermediate phase with --compile-from=<phase name>
:
$ iree-compile simple_exp_abi.mlir \\\n--iree-hal-target-backends=llvm-cpu \\\n--compile-from=abi \\\n-o simple_exp_cpu.vmfb\n
"},{"location":"developers/general/release-management/","title":"Release management","text":"IREE cuts automated releases via a workflow that is triggered daily. The only constraint placed on the commit that is released is that it has passed all CI checks. These are published on GitHub with the \"pre-release\" status. For debugging this process, see the Release debugging playbook.
We periodically promote one of these candidates to a \"stable\" release by removing the \"pre-release\" status. This makes it show up as a \"latest\" release on GitHub. We also push the Python packages for this release to PyPI.
"},{"location":"developers/general/release-management/#picking-a-candidate-to-promote","title":"Picking a candidate to promote","text":"When selecting a candidate we use the following criteria:
- \u2a864 days old so that problems with it may have been spotted
- Contains no P0 regressions vs the previous stable release
- LLVM submodule commit ideally exists upstream (no cherry picks or patches)
When you've identified a potential candidate, email the iree-discuss list with the proposal and solicit feedback. People may point out known regressions or request that some feature make the cut.
"},{"location":"developers/general/release-management/#promoting-a-candidate-to-stable","title":"Promoting a candidate to stable","text":" -
(Authorized users only) Push to PyPI using pypi_deploy.sh
- For Googlers, the password is stored at http://go/iree-pypi-password
-
Open the release on GitHub. Rename the release from \"candidate\" to \"stable\", uncheck the option for \"pre-release\", and check the option for \"latest\".
"},{"location":"developers/general/testing-guide/","title":"Testing guide","text":"Like the IREE project in general, IREE tests are divided into a few different components and use different tooling depending on the needs of that component.
Test type Test Build system Supported platforms Compiler tests iree_lit_test Bazel/CMake Host Runtime tests iree_cc_test Bazel/CMake Host/Device iree_native_test Bazel/CMake Host/Device iree_hal_cts_test_suite CMake Host/Device Core E2E tests iree_check_test Bazel/CMake Host/Device iree_trace_runner_test Bazel/CMake Host/Device iree_generated_trace_runner_test Bazel/CMake Host/Device iree_static_linker_test CMake Host/Device There are also more *_test_suite
targets that groups test targets with the same configuration together.
"},{"location":"developers/general/testing-guide/#compiler-tests","title":"Compiler tests","text":"Tests for the IREE compilation pipeline are written as lit tests in the same style as MLIR.
By convention, IREE includes tests for
- printing and parsing of ops in
.../IR/test/{OP_CATEGORY}_ops.mlir
files - folding and canonicalization in
.../IR/test/{OP_CATEGORY}_folding.mlir
files - compiler passes and pipelines in other
.../test/*.mlir
files
"},{"location":"developers/general/testing-guide/#running-a-test","title":"Running a test","text":"For the test iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir
With CMake, run this from the build directory:
ctest -R iree/compiler/Dialect/VM/Conversion/MathToVM/test/arithmetic_ops.mlir.test\n
With Bazel, run this from the repo root:
bazel test //compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/test:arithmetic_ops.mlir.test\n
"},{"location":"developers/general/testing-guide/#writing-a-test","title":"Writing a test","text":"For advice on writing MLIR compiler tests, see the MLIR testing guide. Tests should be .mlir
files in test
directory adjacent to the functionality they are testing. Instead of mlir-opt
, use iree-opt
, which registers IREE dialects and passes and doesn't register some unnecessary core ones.
As with most parts of the IREE compiler, these should not have a dependency on the runtime.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system","title":"Configuring the build system","text":"In the Bazel BUILD file, create a iree_lit_test_suite
rule. We usually create a single suite that globs all .mlir
files in the directory and is called \"lit\".
load(\"//iree/build_tools/bazel:iree_lit_test.bzl\", \"iree_lit_test_suite\")\n\niree_lit_test_suite(\n name = \"lit\",\n srcs = glob([\"*.mlir\"]),\n tools = [\n \"@llvm-project//llvm:FileCheck\",\n \"//tools:iree-opt\",\n ],\n)\n
There is a corresponding CMake function, calls to which will be generated by our Bazel to CMake converter.
iree_lit_test_suite(\nNAME\nlit\nSRCS\n\"arithmetic_ops.mlir\"\nDATA\nFileCheck\niree-opt\n)\n
You can also create a test for a single file with iree_lit_test
.
"},{"location":"developers/general/testing-guide/#runtime-tests","title":"Runtime tests","text":"Tests for the runtime C++ code use the GoogleTest testing framework. They should generally follow the style and best practices of that framework.
"},{"location":"developers/general/testing-guide/#running-a-test_1","title":"Running a test","text":"For the test /runtime/src/iree/base/bitfield_test.cc
:
With CMake, run this from the build directory:
ctest -R iree/base/bitfield_test\n
With Bazel, run this from the repo root:
bazel test //runtime/src/iree/base:arena_test\n
"},{"location":"developers/general/testing-guide/#setting-test-environments","title":"Setting test environments","text":"Parallel testing for ctest
can be enabled via the CTEST_PARALLEL_LEVEL
environment variable. For example:
export CTEST_PARALLEL_LEVEL=$(nproc)\n
To use the Vulkan backend as test driver, you may need to select between a Vulkan implementation from SwiftShader and multiple Vulkan-capable hardware devices. This can be done via environment variables. See the generic Vulkan setup page for details regarding these variables.
For Bazel, you can persist the configuration in user.bazelrc
to save typing. For example:
test:vkswiftshader --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vkswiftshader --test_env=\"VK_LAYER_PATH=...\"\ntest:vknative --test_env=\"LD_LIBRARY_PATH=...\"\ntest:vknative --test_env=\"VK_LAYER_PATH=...\"\n
Then you can use bazel test --config=vkswiftshader
to select SwiftShader as the Vulkan implementation. Similarly for other implementations.
"},{"location":"developers/general/testing-guide/#writing-a-test_1","title":"Writing a test","text":"For advice on writing tests in the GoogleTest framework, see the GoogleTest primer. Test files for source file foo.cc
with build target foo
should live in the same directory with source file foo_test.cc
and build target foo_test
. You should #include
iree/testing/gtest.h
instead of any of the gtest or gmock headers.
As with all parts of the IREE runtime, these should not have a dependency on the compiler.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system_1","title":"Configuring the build system","text":"In the Bazel BUILD file, create a cc_test
target with your test file as the source and any necessary dependencies. Usually, you can link in a standard gtest main function. Use iree/testing:gtest_main
instead of the gtest_main
that comes with gtest.
cc_test(\n name = \"arena_test\",\n srcs = [\"arena_test.cc\"],\n deps = [\n \":arena\",\n \"//iree/testing:gtest_main\",\n ],\n)\n
We have created a corresponding CMake function iree_cc_test
that mirrors the Bazel rule's behavior. Our Bazel to CMake converter should generally derive the CMakeLists.txt
file from the BUILD file:
iree_cc_test(\nNAME\narena_test\nSRCS\n\"arena_test.cc\"\nDEPS\n::arena\niree::testing::gtest_main\n)\n
There are other more specific test targets, such as iree_hal_cts_test_suite
, which are designed to test specific runtime support with template configuration and is not supported by Bazel rules.
"},{"location":"developers/general/testing-guide/#iree-core-end-to-end-e2e-tests","title":"IREE core end-to-end (e2e) tests","text":"Here \"end-to-end\" means from the input accepted by the IREE core compiler (dialects like TOSA, StableHLO, Linalg) to execution using the IREE runtime components. It does not include tests of the integrations with ML frameworks (e.g. TensorFlow, PyTorch) or bindings to other languages (e.g. Python).
We avoid using the more traditional lit
tests used elsewhere in the compiler for runtime execution tests. Lit tests require running the compiler tools on the test platform through shell or python scripts that act on files from a local file system. On platforms like Android, the web, and embedded systems, each of these features is either not available or is severely limited.
Instead, to test these flows we use a custom framework called check
. The check framework compiles test programs on the host machine into standalone test binary files that can be pushed to test devices (such as Android phones) where they run with gtest style assertions (e.g. check.expect_almost_eq(lhs, rhs)
).
"},{"location":"developers/general/testing-guide/#building-e2e-tests","title":"Building e2e tests","text":"The files needed by these tests are not built by default with CMake. You'll need to build the special iree-test-deps
target to generate test files prior to running CTest (from the build directory):
cmake --build . --target iree-test-deps\n
To run e2e model tests in generated_e2e_model_tests.cmake, because of their dependencies, -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON
needs to be set when configuring CMake. Also see IREE Benchmark Suite Prerequisites for required packages.
"},{"location":"developers/general/testing-guide/#running-a-test_2","title":"Running a Test","text":"For the test tests/e2e/xla_ops/floor.mlir
compiled for the VMVX target backend and running on the VMVX driver (here they match exactly, but in principle there's a many-to-many mapping from backends to drivers).
With CMake, run this from the build directory:
ctest -R tests/e2e/xla_ops/check_vmvx_local-task_floor.mlir\n
With Bazel, run this from the repo root:
bazel test tests/e2e/xla_ops:check_vmvx_local-task_floor.mlir\n
"},{"location":"developers/general/testing-guide/#setting-test-environments_1","title":"Setting test environments","text":"Similarly, you can use environment variables to select Vulkan implementations for running tests as explained in the Runtime tests section.
"},{"location":"developers/general/testing-guide/#writing-a-test_2","title":"Writing a test","text":"These tests live in tests/e2e
. A single test consists of a .mlir
source file specifying an IREE module where each exported function takes no inputs and returns no results and corresponds to a single test case.
As an example, here are some tests for the MHLO floor operation:
func.func @tensor() {\n %input = util.unfoldable_constant dense<[0.0, 1.1, 2.5, 4.9]> : tensor<4xf32>\n %result = \"mhlo.floor\"(%input) : (tensor<4xf32>) -> tensor<4xf32>\n check.expect_almost_eq_const(%result, dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>): tensor<4xf32>\n return\n}\n\nfunc.func @scalar() {\n %input = util.unfoldable_constant dense<101.3> : tensor<f32>\n %result = \"mhlo.floor\"(%input) : (tensor<f32>) -> tensor<f32>\n check.expect_almost_eq_const(%result, dense<101.0> : tensor<f32>): tensor<f32>\n return\n}\n\nfunc.func @negative() {\n %input = util.unfoldable_constant dense<-1.1> : tensor<f32>\n %result = \"mhlo.floor\"(%input) : (tensor<f32>) -> tensor<f32>\n check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>): tensor<f32>\n return\n}\n
Test cases are created in gtest for each public function exported by the module.
Note the use of util.unfoldable_constant
to specify test constants. If we were to use a regular constant the compiler would fold away everything at compile time and our test would not actually test the runtime. unfoldable_constant
adds a barrier that prevents folding. To prevent folding/constant propagate on an arbitrary SSA-value you can use util.optimization_barrier
.
Next we use this input constant to exercise the runtime feature under test (in this case, just a single floor operation). Finally, we use a check dialect operation to make an assertion about the output. There are a few different assertion operations. Here we use the expect_almost_eq_const
op: almost because we are comparing floats and want to allow for floating-point imprecision, and const because we want to compare it to a constant value. This last part is just syntactic sugar around
%expected = arith.constant dense<101.0> : tensor<f32>\ncheck.expect_almost_eq(%result, %expected) : tensor<f32>\n
The output of running this test looks like:
[==========] Running 4 tests from 1 test suite.\n[----------] Global test environment set-up.\n[----------] 4 tests from module\n[ RUN ] module.tensor\n[ OK ] module.tensor (76 ms)\n[ RUN ] module.scalar\n[ OK ] module.scalar (79 ms)\n[ RUN ] module.double\n[ OK ] module.double (55 ms)\n[ RUN ] module.negative\n[ OK ] module.negative (54 ms)\n[----------] 4 tests from module (264 ms total)\n\n[----------] Global test environment tear-down\n[==========] 4 tests from 1 test suite ran. (264 ms total)\n[ PASSED ] 4 tests.\n
The \"module\" name for the test suite comes from the default name for an implicit MLIR module. To give the test suite a more descriptive name, use an explicit named top-level module in this file.
"},{"location":"developers/general/testing-guide/#configuring-the-build-system_2","title":"Configuring the build system","text":"A single .mlir
source file can be turned into a test target with the iree_check_test
Bazel macro (and corresponding CMake function).
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test\")\n\niree_check_test(\n name = \"check_vmvx_local-task_floor.mlir\",\n src = \"floor.mlir\",\n driver = \"local-task\",\n target_backend = \"vmvx\",\n)\n
The target naming convention is \"check_backend_driver_src\". The generated test will automatically be tagged with a \"driver=vmvx\" tag, which can help filter tests by backend (especially when many tests are generated, as below).
Usually we want to create a suite of tests across many backends and drivers. This can be accomplished with additional macros. For a single backend/driver pair:
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_single_backend_test_suite\")\n\niree_check_single_backend_test_suite(\n name = \"check_vmvx_local-task\",\n srcs = glob([\"*.mlir\"]),\n driver = \"local-task\",\n target_backend = \"vmvx\",\n)\n
This will generate a separate test target for each file in srcs
with a name following the convention above as well as a Bazel test_suite called \"check_vmvx_local-task\" that will run all the generated tests.
You can also generate suites across multiple pairs:
load(\"//build_tools/bazel:iree_check_test.bzl\", \"iree_check_test_suite\")\n\niree_check_test_suite(\n name = \"check\",\n srcs = [\"success.mlir\"],\n # Leave this argument off to run on all supported backend/driver pairs.\n target_backends_and_drivers = [\n (\"vmvx\", \"local-task\"),\n (\"vulkan-spirv\", \"vulkan\"),\n ],\n)\n
This will create a test per source file and backend/driver pair, a test suite per backend/driver pair, and a test suite, \"check\", that will run all the tests.
The CMake functions follow a similar pattern. The calls to them are generated in our CMakeLists.txt
file by bazel_to_cmake.
There are other test targets that generate tests based on template configuraton and platform detection, such as iree_static_linker_test
. Those targets are not supported by Bazel rules at this point.
"},{"location":"developers/performance/benchmark-suites/","title":"Benchmark suites","text":"IREE Benchmarks Suites is a collection of benchmarks for IREE developers to track performance improvements/regressions during development.
The benchmark suites are run for each commit on the main branch and the results are uploaded to https://perf.iree.dev for regression analysis (for the current supported targets). On pull requests, users can add labels benchmarks:*
to trigger the benchmark runs. The results will be compared with https://perf.iree.dev and post in the comments.
Information about the definitions of the benchmark suites can be found in the IREE Benchmark Suites Configurations.
"},{"location":"developers/performance/benchmark-suites/#running-benchmark-suites-locally","title":"Running benchmark suites locally","text":""},{"location":"developers/performance/benchmark-suites/#prerequisites","title":"Prerequisites","text":"Install iree-import-tf
and iree-import-tflite
in your Python environment (see Tensorflow Integration and TFLite Integration).
"},{"location":"developers/performance/benchmark-suites/#choose-benchmark-presets","title":"Choose benchmark presets","text":"IREE Benchmark Suites contain many benchmarks for different devices and model sizes, which can take lots of space and time to build all of them. So benchmarks are grouped into presets to allow building and running only a subset of them. The available presets are:
Execution benchmarks:
android-cpu
: benchmarks for mobile CPUs android-gpu
: benchmarks for mobile GPUs cuda
: benchmarks for CUDA with a small model set cuda-large
: benchmarks for CUDA with a large model set vulkan-nvidia
: benchmarks for Vulkan on NVIDIA graphics cards x86_64
: benchmarks for x86_64 CPUs with a small model set x86_64-large
: benchmarks for x86_64 with a large model set
Compilation benchmarks (to collect compilation statistics, such as module sizes):
comp-stats
: compilation benchmarks with a small model set comp-stats-large
: compilation benchmark with a large model set
Note that *-large
presets will download and build a few hundreds GBs of artifacts.
Set the environment variables of benchmark presets for the steps below, for example:
export EXECUTION_BENCHMARK_PRESETS=\"cuda,x86_64\"\nexport COMPILATION_BENCHMARK_PRESETS=\"comp-stats\"\n
"},{"location":"developers/performance/benchmark-suites/#build-benchmark-suites","title":"Build benchmark suites","text":"Configure IREE with -DIREE_BUILD_E2E_TEST_ARTIFACTS=ON
:
cmake -GNinja -B \"${IREE_BUILD_DIR?}\" -S \"${IREE_REPO?}\" \\\n-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\n-DCMAKE_C_COMPILER=clang \\\n-DCMAKE_CXX_COMPILER=clang++ \\\n-DIREE_ENABLE_LLD=ON \\\n-DIREE_BUILD_E2E_TEST_ARTIFACTS=ON\n
If you only need the imported MLIR models:
cmake --build \"${IREE_BUILD_DIR?}\" --target \\\niree-benchmark-import-models\n # For large benchmarks (this will take > 100G disk space)\n# iree-benchmark-import-models-large\n
Otherwise, compile the benchmark suites and tools for benchmarking:
cmake --build \"${IREE_BUILD_DIR?}\" --target \\\niree-benchmark-suites \\\n# If any *-large preset is enabled, also build this target:\n# iree-benchmark-suites-large \\\niree-benchmark-module\nexport E2E_TEST_ARTIFACTS_DIR=\"${IREE_BUILD_DIR?}/e2e_test_artifacts\"\n
TODO(#13683): Each preset should have its own target to further reduce unnecessary builds
"},{"location":"developers/performance/benchmark-suites/#run-benchmarks","title":"Run benchmarks","text":"Export the execution benchmark config:
build_tools/benchmarks/export_benchmark_config.py execution \\\n--benchmark_presets=\"${EXECUTION_BENCHMARK_PRESETS?}\" \\\n> \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n
Run benchmarks (currently only support running on a Linux host):
build_tools/benchmarks/run_benchmarks_on_linux.py \\\n--normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--target_device_name=\"<target_device_name, e.g. c2-standard-16>\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n--verbose \\\n--cpu_uarch=\"<host CPU uarch, e.g. CascadeLake>\"\n# Traces can be collected by adding:\n# --traced_benchmark_tool_dir=\"${IREE_TRACED_BUILD_DIR?}/tools\" \\\n# --trace_capture_tool=/path/to/iree-tracy-capture \\\n# --capture_tarball=captured_tracy_files.tar.gz\n
Note that:
<target_device_name>
selects a benchmark group targets a specific device: - Common options:
c2-standard-16
for x86_64 CPU benchmarks. a2-highgpu-1g
for NVIDIA GPU benchmarks.
- All device names are defined under build_tools/python/e2e_test_framework/device_specs.
- To run x86_64 benchmarks, right now
--cpu_uarch
needs to be provided and only CascadeLake
is available currently. - To build traced benchmark tools, see Profiling with Tracy.
Filters can be used to select the benchmarks:
build_tools/benchmarks/run_benchmarks_on_linux.py \\\n--normal_benchmark_tool_dir=\"${IREE_BUILD_DIR?}/tools\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--target_device_name=\"c2-standard-16\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/benchmark_results.json\" \\\n--verbose \\\n--cpu_uarch=\"CascadeLake\" \\\n--model_name_regex=\"MobileBert*\" \\\n--driver_filter_regex='local-task' \\\n--mode_regex=\"4-thread\"\n
"},{"location":"developers/performance/benchmark-suites/#generate-compilation-statistics-compilation-benchmarks","title":"Generate compilation statistics (compilation benchmarks)","text":"Export the compilation benchmark config:
build_tools/benchmarks/export_benchmark_config.py compilation \\\n--benchmark_presets=\"${COMPILATION_BENCHMARK_PRESETS?}\" \\\n> \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
Generate the compilation statistics:
build_tools/benchmarks/collect_compilation_statistics.py \\\n--compilation_benchmark_config=comp_config.json \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--build_log=\"${IREE_BUILD_DIR?}/.ninja_log\" \\\n--output=\"${E2E_TEST_ARTIFACTS_DIR?}/compile_stats_results.json\"\n
Note that you need to use Ninja to build the benchmark suites as the tool collects information from its build log.
"},{"location":"developers/performance/benchmark-suites/#show-execution-compilation-benchmark-results","title":"Show execution / compilation benchmark results","text":"If you want to generate a comparison report locally, you can use diff_local_benchmarks.py script to compare two result json files and generate the report. For example:
build_tools/benchmarks/diff_local_benchmarks.py \\\n--base \"${E2E_TEST_ARTIFACTS_DIR?}/before_benchmark_results.json\" \\\n--target \"${E2E_TEST_ARTIFACTS_DIR?}/after_benchmark_results.json\" \\\n> report.md\n
An example that compares compilation statistics:
build_tools/benchmarks/diff_local_benchmarks.py \\\n--base-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/before_compile_stats_results.json\" \\\n--target-compile-stats \"${E2E_TEST_ARTIFACTS_DIR?}/after_compile_stats_results.json\" \\\n> report.md\n
"},{"location":"developers/performance/benchmark-suites/#find-compile-and-run-commands-to-reproduce-benchmarks","title":"Find compile and run commands to reproduce benchmarks","text":"Each benchmark has its benchmark ID in the benchmark suites, you will see a benchmark ID at:
- In the serie's URL of https://perf.iree.dev
- Execution benchmark:
https://perf.iree.dev/serie?IREE?<benchmark_id>
- Compilation benchmark:
https://perf.iree.dev/serie?IREE?<benchmark_id>-<metric_id>
- In
benchmark_results.json
and compile_stats_results.json
- Execution benchmark result has a field
run_config_id
- Compilation benchmark result has a field
gen_config_id
- In PR benchmark summary or the markdown generated by
diff_local_benchmarks.py
, each benchmark has the link to its https://perf.iree.dev URL, which includes the benchmark ID.
If you don't have artifacts locally, see Fetching Benchmark Artifacts from CI to find the GCS directory of the CI artifacts. Then fetch the needed files:
# Get ${E2E_TEST_ARTIFACTS_DIR_URL} from \"Fetching Benchmark Artifacts from CI\".\nexport E2E_TEST_ARTIFACTS_DIR=\"e2e_test_artifacts\"\n\n# Download all artifacts\nmkdir \"${E2E_TEST_ARTIFACTS_DIR?}\"\ngcloud storage cp -r \"${E2E_TEST_ARTIFACTS_DIR_URL?}\" \"${E2E_TEST_ARTIFACTS_DIR?}\"\n
Run the helper tool to dump benchmark commands from benchmark configs:
build_tools/benchmarks/benchmark_helper.py dump-cmds \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/execution-benchmark-config.json\" \\\n--compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/compilation-benchmark-config.json\" \\\n--e2e_test_artifacts_dir=\"${E2E_TEST_ARTIFACTS_DIR?}\" \\\n--benchmark_id=\"<benchmark_id>\"\n
"},{"location":"developers/performance/benchmark-suites/#get-full-list-of-benchmarks","title":"Get full list of benchmarks","text":"The commands below output the full list of execution and compilation benchmarks, including the benchmark names and their flags:
build_tools/benchmarks/export_benchmark_config.py execution > \"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\nbuild_tools/benchmarks/export_benchmark_config.py compilation > \"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\nbuild_tools/benchmarks/benchmark_helper.py dump-cmds \\\n--execution_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\" \\\n--compilation_benchmark_config=\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
"},{"location":"developers/performance/benchmark-suites/#fetching-benchmark-artifacts-from-ci","title":"Fetching benchmark Artifacts from CI","text":""},{"location":"developers/performance/benchmark-suites/#1-find-the-corresponding-ci-workflow-run","title":"1. Find the corresponding CI workflow run","text":"On the commit of the benchmark run, you can find the list of the workflow jobs by clicking the green check mark. Click any job starts with CI /
:
"},{"location":"developers/performance/benchmark-suites/#2-get-urls-of-gcs-artifacts","title":"2. Get URLs of GCS artifacts","text":"On the CI page, click Summary
on the top-left to open the summary page. Scroll down and the links to artifacts are listed in a section titled \"Artifact Links\". Paste the content in your shell to define all needed variables for the following steps:
"},{"location":"developers/performance/benchmark-suites/#3-fetch-the-benchmark-artifacts","title":"3. Fetch the benchmark artifacts","text":"To fetch files from the GCS URL, the gcloud CLI tool (https://cloud.google.com/sdk/docs/install) can list the directory contents and download files (see https://cloud.google.com/sdk/gcloud/reference/storage for more usages). If you want to use CI artifacts to reproduce benchmarks locally, see Find Compile and Run Commands to Reproduce Benchmarks.
Assume you get the GCS URL variables from Get URLs of GCS artifacts.
Download artifacts:
# The GCS directory has the same structure as your local ${IREE_BUILD_DIR?}/e2e_test_artifacts.\ngcloud storage ls \"${E2E_TEST_ARTIFACTS_DIR_URL?}\"\n\n# Download all source and imported MLIR files:\ngcloud storage cp \"${E2E_TEST_ARTIFACTS_DIR_URL?}/*.mlir\" \"<target_dir>\"\n
Execution and compilation benchmark configs can be downloaded at:
# Execution benchmark config:\ngcloud storage cp \\\n\"${E2E_TEST_ARTIFACTS_DIR_URL?}/execution-benchmark-config.json\" \\\n\"${E2E_TEST_ARTIFACTS_DIR?}/exec_config.json\"\n\n# Compilation benchmark config:\ngcloud storage cp \\\n\"${E2E_TEST_ARTIFACTS_DIR_URL?}/compilation-benchmark-config.json\" \\\n\"${E2E_TEST_ARTIFACTS_DIR?}/comp_config.json\"\n
Benchmark raw results and traces can be downloaded at:
# Execution benchmark raw results\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-results-*.json\" .\n\n# Optional: Merge raw results into a single file\nbuild_tools/benchmarks/benchmark_helper.py merge-results benchmark-results-*.json > benchmark_results.json\n\n# Execution benchmark traces\ngcloud storage cp \"${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-traces-*.tar.gz\" .\n\n# Compilation benchmark results\ngcloud storage cp \"${COMPILATION_BENCHMARK_RESULTS_URL?}\" .\n
"},{"location":"developers/performance/benchmarking/","title":"Benchmarking","text":"IREE uses benchmarks to inspect performance at varying levels of granularity. Benchmarking is implemented using the Google Benchmark library. To understand performance details and guide optimization, please refer to the IREE profiling documentation.
"},{"location":"developers/performance/benchmarking/#module-benchmarks","title":"Module Benchmarks","text":"iree-benchmark-module
is a program accepting (almost) the same inputs as iree-run-module
that will benchmark the invocation of a single entry function. It measures timing for the whole process of invoking a function through the VM, including allocating and freeing output buffers. This is a high-level benchmark of an entire invocation flow. It provides a big picture view, but depends on many different variables, like an integration test. For finer-grained measurements more akin to unit tests, see Executable Benchmarks.
To use iree-benchmark-module
, generate an IREE module for the target backend:
$ bazel run //tools:iree-compile -- \\\n--iree-hal-target-backends=vmvx \\\n$PWD/samples/models/simple_abs.mlir \\\n-o /tmp/module.fb\n
and then benchmark an exported function in that module:
$ bazel run //tools:iree-benchmark-module -- \\\n--module=/tmp/module.fb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
You'll see output like
Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n L1 Data 32K (x6)\nL1 Instruction 32K (x6)\nL2 Unified 1024K (x6)\nL3 Unified 8448K (x1)\nLoad Average: 2.21, 1.93, 3.34\n***WARNING*** CPU scaling is enabled, the benchmark real time measurements may\n be noisy and will incur extra overhead.\n***WARNING*** Library was built as DEBUG. Timings may be affected.\n------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time 0.22 ms 0.23 ms 3356\n
Notice that there are a few warnings in there (you may not see all of these). The benchmark library helpfully warns about some common issues that will affect benchmark timing. When trying to obtain real benchmark numbers, you should generally build an optimized build (-c opt
in Bazel) and disable CPU scaling.
bazel build -c opt //tools:iree-benchmark-module\n
Another thing to consider is that depending on where you are running the benchmark you might want to avoid additional programs running at the same time. Bazel itself runs a server even when it's not being actively invoked that can be quite a memory hog, so we'll instead invoke the binary directly. Use your favorite process manager (e.g. htop or pkill on Linux) to kill heavy-weight programs such as Chrome and Bazel.
Now we'll actually invoke the binary:
$ ./bazel-bin/tools/iree-benchmark-module \\\n--module=/tmp/module.fb \\\n--device=local-task \\\n--function=abs \\\n--input=f32=-2\n
Run on (12 X 4500 MHz CPU s)\nCPU Caches:\n L1 Data 32K (x6)\nL1 Instruction 32K (x6)\nL2 Unified 1024K (x6)\nL3 Unified 8448K (x1)\nLoad Average: 1.49, 3.42, 3.49\n------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n------------------------------------------------------------------------------\nBM_RunModule/process_time/real_time 0.011 ms 0.014 ms 61654\n
Remember to restore CPU scaling when you're done.
"},{"location":"developers/performance/benchmarking/#executable-benchmarks","title":"Executable Benchmarks","text":"We also benchmark the performance of individual parts of the IREE system in isolation. IREE breaks a model down to dispatch functions. To benchmark all the dispatch functions, generate an IREE module with the -iree-flow-export-benchmark-funcs
flag set:
$ build/tools/iree-compile \\\n--iree-input-type=stablehlo \\\n--iree-flow-export-benchmark-funcs \\\n--iree-hal-target-backends=vmvx \\\ntests/e2e/stablehlo_models/fullyconnected.mlir \\\n-o /tmp/fullyconnected.vmfb\n
and then benchmark all exported dispatch functions (and all exported functions) in that module:
$ build/tools/iree-benchmark-module\n --module=/tmp/fullyconnected.vmfb\n --device=local-task\n
If no entry_function
is specified, iree-benchmark-module
will register a benchmark for each exported function that takes no inputs.
You will see output like:
Run on (72 X 3700 MHz CPU s)\nCPU Caches:\n L1 Data 32 KiB (x36)\nL1 Instruction 32 KiB (x36)\nL2 Unified 1024 KiB (x36)\nL3 Unified 25344 KiB (x2)\nLoad Average: 4.39, 5.72, 6.76\n---------------------------------------------------------------------------------------------\nBenchmark Time CPU Iterations\n---------------------------------------------------------------------------------------------\nBM_main_ex_dispatch_0_benchmark/process_time/real_time 0.030 ms 0.037 ms 34065\nBM_main_ex_dispatch_1_benchmark/process_time/real_time 0.034 ms 0.042 ms 20567\nBM_main_ex_dispatch_2_benchmark/process_time/real_time 0.043 ms 0.051 ms 18576\nBM_main_ex_dispatch_3_benchmark/process_time/real_time 0.029 ms 0.036 ms 21345\nBM_main_ex_dispatch_4_benchmark/process_time/real_time 0.042 ms 0.051 ms 15880\nBM_main_ex_dispatch_5_benchmark/process_time/real_time 0.030 ms 0.037 ms 17854\nBM_main_ex_dispatch_6_benchmark/process_time/real_time 0.043 ms 0.052 ms 14919\nBM_main_benchmark/process_time/real_time 0.099 ms 0.107 ms 5892\n
"},{"location":"developers/performance/benchmarking/#bytecode-module-benchmarks","title":"Bytecode Module Benchmarks","text":"Normally, the IREE VM is expected to be integrated into applications and driving model execution. So its performance is of crucial importance. We strive to introduce as little overhead as possible and have several benchmark binaries dedicated for evaluating the VM's performance. These benchmark binaries are named as *_benchmark
in the iree/vm/
directory. They also use the Google Benchmark library as the above.
"},{"location":"developers/performance/benchmarking/#cpu-configuration","title":"CPU Configuration","text":"When benchmarking, it's important to consider the configuration of your CPUs. Most notably, CPU scaling can give variable results, so you'll usually want to disable it. This can get pretty complex, but the most basic thing to do is to run all CPUs at maximum frequency. The other thing to consider is what CPU(s) your program is running on. Both of these get more complicated on mobile and in multithreaded workloads.
"},{"location":"developers/performance/benchmarking/#linux","title":"Linux","text":"Google benchmark provides some instructions. Note that the library will print \"CPU scaling is enabled\" warnings for any configuration that doesn't have the quota governor set to performance. Similarly the CPU frequency it reports is the maximum frequency of cpu0, not the frequency of the processor it's actually running on. This means that more advanced configurations should ignore these messages.
Turn off CPU scaling before benchmarking.
sudo cpupower frequency-set --governor performance\n
Restore CPU scaling after benchmarking:
sudo cpupower frequency-set --governor powersave\n
To learn more about different quota governor settings, see https://www.kernel.org/doc/Documentation/cpu-freq/governors.txt. To restrict which CPUs you run on, use the taskset
command which takes a hexadecimal mask.
To only run on the lowest-numbered CPU you can run
taskset 1 sleep 20 &\n
You can confirm that the process is running on the given CPU:
ps -o psr $!\n
Note that $!
indicates the process ID of the last executed background command, so you can only use this shorthand if you didn't run any commands after the sleep. For more info on taskset, see https://linux.die.net/man/1/taskset.
"},{"location":"developers/performance/benchmarking/#android","title":"Android","text":"Read and understand the Linux instructions first.
Android doesn't give us quite as nice tooling, but the principle is basically the same. One important difference is that thermal throttling is a much bigger concern on mobile. Without a cooling plate, it is likely that high clock speeds will overheat the device and engage thermal throttling, which will ignore whatever clock speeds you may have set to prevent things from catching on fire. Therefore the naive approach above is likely not a good idea.
You will likely need to be root (use su
or adb root
). The commands will depend on your exact phone and number of cores. First play around and make sure you understand what everything means. Note that each CPU has its own files which are used to control its behavior, but changes to a single CPU will sometimes affect others (see /sys/devices/system/cpu/cpu0/cpufreq/affected_cpus
).
Some useful files:
/proc/cpuinfo\n/sys/devices/system/cpu/possible\n/sys/devices/system/cpu/present\n/sys/devices/system/cpu/cpu0/online\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq\n/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq\n/sys/devices/system/cpu/cpu0/cpufreq/affected_cpus\n/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed\n
See the clockspeed of each CPU
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\npaste \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_cur_freq\" \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_min_freq\" \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/cpuinfo_max_freq\"; \\\ndone\n
Before changing things, make sure to check the current scaling governor settings first so you can put them back when you're done.
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\ncat \"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
"},{"location":"developers/performance/benchmarking/#single-core-example","title":"Single-Core Example","text":"Here's an example to run IREE in a single-threaded context on CPU 7 at its lowest clock speed.
First we'll take control of the clockspeed by setting the governor to \"userspace\".
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\necho userspace > \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
We can now set individual clock speeds. We'll pin cpu7 to its minimum frequency. We choose the minimum instead of the maximum here to mitigate thermal throttling concerns
$ cat /sys/devices/system/cpu/cpu7/cpufreq/cpuinfo_min_freq > \\\n/sys/devices/system/cpu/cpu7/cpufreq/scaling_setspeed\n
We can confirm the frequencies of all the CPUs by running the same command above. Now to run a command specifically on cpu7, use taskset 80
(hex for 10000000):
taskset 80 sleep 20 &\nps -o psr $!\n
Remember to cleanup when you're done! Here we'll set the scaling governor back to schedutil because that's what they were before on the particular device this, was tested on, but that may not exist on all devices.
$ for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \\\necho schedutil > \\\n\"/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor\"; \\\ndone\n
"},{"location":"developers/performance/benchmarking/#android-scripts","title":"Android Scripts","text":"We provide a few scripts to set clockspeeds on Android (under build_tools/benchmarks
). These are somewhat device-specific:
- The
set_android_scaling_governor.sh
work on all CPUs, but the default governor name may be different across devices. - The
set_*_gpu_scaling_policy.sh
script used should match the actual GPU on your device.
Sample configuration steps for Pixel 6:
- Copy all scripts to the device:
adb push build_tools/benchmarks/*.sh /data/local/tmp\n
- Launch interactive adb shell as super user:
adb shell\noriole:/ # su\noriole:/ # cd /data/local/tmp\n
- Pin frequencies (high clockspeeds):
oriole:/ # ./set_android_scaling_governor.sh\nCPU info (before changing governor):\n cpu governor cur min max\n ------------------------------------------------\n cpu0 sched_pixel 1098000 300000 1803000\ncpu1 sched_pixel 1598000 300000 1803000\ncpu2 sched_pixel 1598000 300000 1803000\ncpu3 sched_pixel 1098000 300000 1803000\ncpu4 sched_pixel 400000 400000 2253000\ncpu5 sched_pixel 400000 400000 2253000\ncpu6 sched_pixel 500000 500000 2802000\ncpu7 sched_pixel 500000 500000 2802000\nSetting CPU frequency governor to performance\n CPU info (after changing governor):\n cpu governor cur min max\n ------------------------------------------------\n cpu0 performance 1803000 300000 1803000\ncpu1 performance 1803000 300000 1803000\ncpu2 performance 1803000 300000 1803000\ncpu3 performance 1803000 300000 1803000\ncpu4 performance 2253000 400000 2253000\ncpu5 performance 2253000 400000 2253000\ncpu6 performance 2802000 500000 2802000\ncpu7 performance 2802000 500000 2802000\noriole:/data/local/tmp # ./set_pixel6_gpu_scaling_policy.sh\nGPU info (before changing frequency scaling policy):\n policy cur min max\n --------------------------------------------------------------\n coarse_demand [adaptive] always_on 251000 151000 848000\nSetting GPU frequency scaling policy to performance\n GPU info (after changing frequency scaling policy):\n policy cur min max\n --------------------------------------------------------------\n coarse_demand adaptive [always_on] 848000 151000 848000\n
- Restore default frequencies:
oriole:/ # ./set_android_scaling_governor.sh sched_pixel\n...\noriole:/ # ./set_pixel6_gpu_scaling_policy.sh default\n...\n
TODO(scotttodd): Windows instructions
"},{"location":"developers/performance/profiling-cpu-events/","title":"Profiling CPUs","text":"CPUs are able to record certain events that may be relevant when investigating the performance of a program. A common example of such an event is a \"cache miss\", when the program tries to access data in memory that isn't already in some CPU cache, causing that access to be slower than it could otherwise be.
Querying and analyzing this data can be useful, but is hard in two distinct ways:
- Depending on the CPU and on the OS, both hardware and software limitations can get in the way of obtaining accurate data.
- This data tends to be inherently difficult to interpret, even when it is perfectly accurate. In practice it is often noisy and inaccurate, which makes interpretation even more complicated.
There are two parts to this page: platform-specific information about how to query this data, and, at the end, a platform-independent explanation of how to interpret it.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#perf-and-simpleperf-on-linux-and-android","title":"Perf and Simpleperf, on Linux and Android","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#overview","title":"Overview","text":"The Linux kernel exposes system event counters to user-space programs by means of the perf_event_open
system call. This includes both hardware event counters (such as CPU cache events) and software events from the kernel (such as page faults and context switches). Anyone may use this system call to implement a profiler, but Linux readily offers one, perf
.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#preserving-artifacts","title":"Preserving artifacts","text":"By default IREE cleans up any temporary files it creates while running. Tools like perf, however, require those files exist even after the process has exited. The environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
can be set to preserve the files. This is only needed for the CPU path when using the system loader.
export IREE_PRESERVE_DYLIB_TEMP_FILES=1\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#desktop-linux","title":"Desktop linux","text":"On desktop Linux we can use perf
. It is provided on most Linux distributions, for instance on Debian-based distributions do:
sudo apt install linux-perf\n
Run the program to be profiled, prepending its command line with perf record
. By default this will write the profile data to the current directory, ./perf.data
. Sometimes this isn't ideal, such as then the current directory is under version control. Explicit paths can be specified by -o
flag to direct the output of perf record
, and then by -i
flags to select the input of subsequent commands analyzing the profile. Example:
perf record -o /tmp/perf.data \\\n./tools/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
By default, this samples time spent. One may specify instead an event to sample by, with the -e
flag. For instance, to sample by L1 cache misses, one may do:
perf record -o /tmp/perf.data -e L1-dcache-load-misses \\\n./tools/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
perf list
dumps the list of event types.
Once you have recorded a profile, there are two main ways to analyze it: perf report
and perf annotate
.
perf report
breaks down the event counts by symbol. In the default case where what was sampled was time, this is just an ordinary profile by symbol name, no different than what could be viewed in other profilers such as Tracy. Where it gets really interesting is when the profile was recording a specific event type, as in the above -e L1-dcache-load-misses
example:
perf report -i /tmp/perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', Event count (approx.): 362571861\nOverhead Command Shared Object Symbol\n 61.53% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_31\n 13.30% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_11\n 2.11% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_13\n 1.90% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_19\n 1.54% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_25\n 1.49% cpu0 dylib_executablenzpx2Q.so [.] serving_default_ex_dispatch_5\n
perf annotate
breaks down the event counts by instruction. Again, in the default case where what was sampled was time, this is no different than what could be viewed in Tracy, and the real motivation to use perf
is when profiling by specific event types as in the above -e L1-dcache-load-misses
example:
perf annotate -i perf.data\n\nSamples: 6K of event 'L1-dcache-load-misses', 4000 Hz, Event count (approx.): 362571861\nserving_default_ex_dispatch_31 /tmp/dylib_executablenzpx2Q.so [Percent: local period]\n1.66 \u2502 movups -0x1000(%rdi),%xmm10\n 0.48 \u2502 movups -0x800(%rdi),%xmm9\n 0.82 \u2502 movups (%rdi),%xmm8\n 0.49 \u2502 movaps %xmm1,%xmm4\n 0.12 \u2502 shufps $0x0,%xmm1,%xmm4\n 0.14 \u2502 mulps %xmm5,%xmm4\n 0.28 \u2502 addps %xmm6,%xmm4\n 0.60 \u2502 movaps %xmm3,%xmm6\n 0.34 \u2502 shufps $0x0,%xmm3,%xmm6\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#warning","title":"Warning","text":"perf annotate
is even noisier than perf report
as it can be overly optimistic, depending on the CPU, to pin an event to a specific instruction. Typically, this works fairly well on x86 CPUs and less well on ARM CPUs and more generally on anything mobile. Even on a desktop x86 CPU, this is noisy, as the above example (recorded on a Skylake workstation) shows: it blamed a mulps %xmm5,%xmm4
instruction for a cache miss, which doesn't make sense as that instruction only touches registers.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#android","title":"Android","text":"On Android we can use simpleperf
. It's preinstalled on current Android userdebug
images, and part of the Android NDK.
In theory, as Android is Linux, it should be possible to use perf
. Unfortunately, perf
is difficult to build for Android. Fortunately, simpleperf
is readily available: it is preinstalled in Android userdebug
images, and it is part of the Android NDK.
First, we record on the device:
adb shell \\\nsimpleperf record -e raw-l1d-cache-refill -o /data/local/tmp/perf.data \\\n/data/local/tmp/iree-benchmark-module \\\n--device=local-task \\\n... command-line arguments of iree-benchmark-module as usual ...\n
Then pull the recorded data from the device, and analyze on the desktop. We assume that ${ANDROID_NDK}
points to the local copy of the Android NDK.
adb pull /data/local/tmp/perf.data /tmp/perf.data\n${ANDROID_NDK}/simpleperf/report.py -i /tmp/perf.data\n
This prints a breakdown of raw-l1d-cache-refill
events by symbol.
Like with perf
, a list of event types can be queried by the list
subcommand:
adb shell simpleperf list\n
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#no-support-for-annotate-by-cpu-event","title":"No support for annotate
by CPU event","text":"There is no simpleperf annotate
. The simpleperf
documentation lists a couple of ways of achieving the same thing.
However:
- The common case of annotating by time, as opposed to annotating by CPU event, is supported by Tracy.
- Annotating by CPU event is inherently not working due to hardware limitations of the ARM CPUs found in Android devices. That is, the hardware is too imprecise at pinning an event to a particular instruction.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#interpreting-cpu-event-counts","title":"Interpreting CPU event counts","text":"","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#problems","title":"Problems","text":"There are multiple layers of complexity in interpreting CPU event counts.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-in-themselves-normal","title":"These events are in themselves normal","text":"The first difficulty is in the fact that most of these events are normal. So just knowing that they happened is not in itself actionable.
For example, if we learn that some code causes cache misses, that isn't big news: so does all code. Maybe this code has too many cache misses, but how many is too many? Maybe this code alone accounts for a large fraction of the overall total of the whole program, but maybe even that is normal, for instance if the code being studied is the 'hot' part of the program where a large fraction of overall time is spent?
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-hardware-dependent-and-under-documented","title":"These events are hardware-dependent and under-documented","text":"Many of these events have a meaning that varies between CPUs and that is difficult to characterize on any CPU, let alone in a way that applies to all CPUs.
For example, take the \"L2 data cache refill\". On ARM, with simpleperf
, that would be raw-l2d-cache-refill
. Questions:
- Is \u201cL2\u201d inclusive of \u201cL1\u201d?
- How many bytes are transferred per \u201crefill\u201d?
- Are accesses induced by speculative execution or by automatic pre-fetching counted in the same way as accesses induced by actual code execution?
The answers to all of the above questions are CPU-dependent. They may even vary between the CPU cores of the same Android device.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#these-events-are-imprecise-and-noisy-particularly-on-arm-cpus","title":"These events are imprecise and noisy, particularly on ARM CPUs","text":"Expect noise levels above 10% in many CPU event counts on ARM CPUs. Moreover, on ARM, as discussed above, there is inaccuracy in which instruction is blamed for which event, which will increase inaccuracy of per-symbol breakdowns for very cheap symbols (and makes perf annotate
impossible as noted above). Finally, be aware that some ARM CPUs may perform event count interpolation, so we may not have any access to true hardware counts.
","tags":["CPU"]},{"location":"developers/performance/profiling-cpu-events/#recommendations","title":"Recommendations","text":"Here is a workflow pattern that allows to make significant use of CPU event counts, despite all the problems noted above:
- Hypothesize that some code diff might help performance, and might help reducing the number of CPU events of a certain type, and that the two might be related.
- Benchmark with and without the code diff, on the same device, everything else being equal.
- Let your benchmark perform a fixed number of iterations, or, if using a benchmark termination condition of the form \"run until at least N seconds have elapsed\", carefully divide event counts by the actual number of iterations that were run.
- If the observed CPU event count difference is significant, go ahead and claim that your code diff probably helps with that aspect of CPU behavior.
Some things NOT to be done:
- Don\u2019t try to compare different metrics, not even when it seems obvious that they should satisfy a simple relationship, not even on the same CPU (e.g. \u201cL1 accesses should be greater than L2 accesses\u201d).
- Don\u2019t divide by some \u201ctotal\u201d metric to get some kinds of ratios. For example, don\u2019t try to compute a \u201ccache miss ratio\u201d as quotient of \u201ccache refill\u201d over \u201call cache accesses\u201d metrics. The first problem with that (even before we get to CPU-specific issues) is that that\u2019s rewarding increases to the \u201call cache accesses\u201d metrics, so if something bad happens in your codegen and your kernel ends up spilling a lot of register to the stack, that\u2019s going to be a lot more accesses which will all be L1 hits so that\u2019ll help this ratio look better! So more generally, just try to minimize some CPU metrics (that count \u201ccostly\u201d events), not some more complex math expression formed from arithmetic on CPU metrics.
","tags":["CPU"]},{"location":"developers/performance/profiling-gpu-vulkan/","title":"Profiling GPUs using Vulkan","text":"Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used.
(TODO: add some pictures for each tool)
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#renderdoc","title":"RenderDoc","text":"Support for RenderDoc can be enabled by configuring cmake with -DIREE_ENABLE_RENDERDOC_PROFILING=ON
. When built in to IREE the profiling functionality is available for programmatic use via the iree_hal_device_profiling_begin
and iree_hal_device_profiling_end
APIs.
When using one of the standard IREE tools (iree-run-module
, iree-benchmark-module
, etc) the --device_profiling_mode=queue
flag can be passed to enable capture around the entire invocation (be careful when benchmarking as the recordings can be quite large!). The default capture file name can be specified with --device_profiling_file=foo.rdc
.
Capturing in the RenderDoc UI can be done by specifying the IREE tool or embedding application (iree-run-module
, etc) as the launch executable and adding all arguments as normal.
Capturing from the command line can be done using renderdoccmd
with the specified file appearing (by default) in the executable directory:
renderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=foo.rdc ...\nstat tools/foo.rdc\nrenderdoccmd capture tools/iree-run-module --device_profiling_mode=queue --device_profiling_file=/some/path/foo.rdc ...\nstat /some/path/foo.rdc\n
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#android-gpus","title":"Android GPUs","text":"There are multiple GPU vendors for the Android platforms, each offering their own tools. Android GPU Inspector (AGI) provides a cross-vendor solution. See the documentation for more details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#build-android-app-to-run-iree","title":"Build Android app to run IREE","text":"In order to perform capture and analysis with AGI, you will need a full Android app. In IREE we have a simple Android native app wrapper to help package IREE core libraries together with a specific VM bytecode invocation into an Android app. The wrapper and its documentation are placed at tools/android/run_module_app/
.
For example, to package a module compiled from the following stablehlo-dot.mlir
as an Android app:
func @dot(%lhs: tensor<2x4xf32>, %rhs: tensor<4x2xf32>) -> tensor<2x2xf32> {\n %0 = \"stablehlo.dot\"(%lhs, %rhs) : (tensor<2x4xf32>, tensor<4x2xf32>) -> tensor<2x2xf32>\n return %0 : tensor<2x2xf32>\n}\n
# First compile into a VM bytecode module\n$ /path/to/iree/build/tools/iree-compile -- \\\n--iree-input-type=stablehlo \\\n--iree-hal-target-backends=vulkan-spirv \\\n/path/to/stablehlo-dot.mlir \\\n-o /tmp/stablehlo-dot.vmfb\n\n# Then package the Android app\n$ /path/to/iree/source/tools/android/run_module_app/build_apk.sh \\\n./build-apk \\\n--device vulkan \\\n--module /tmp/stablehlo-dot.vmfb \\\n--function dot \\\n--input=...\n
Where /path/to/input/file
is a file containing inputs to dot
, for example:
2x4xf32=[[1.0 2.0 3.0 4.0][5.0 6.0 7.0 8.0]]\n4x2xf32=[[9.0 10.0][11.0 12.0][13.0 14.0][15.0 16.0]]\n
The above will build an iree-run-module.apk
under the ./build-apk/
directory, which you can then install via adb install
.
build_apk.sh
needs the Android SDK and NDK internally, an easy way to manage them is by installing Android Studio. After installation, you will need to set up a few environment variables, which are printed at the beginning of build_apk.sh
invocation.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#capture-and-analyze-with-agi","title":"Capture and analyze with AGI","text":"You can follow AGI's Getting Started page to learn how to use it. In general the steps are:
- Install the latest AGI from https://github.com/google/agi/releases and launch.
- Fill in the \"Application\" field by searching the app. The line should read like
android.intent.action.MAIN:dev.iree.run_module/android.app.NativeActivity
. - Select start at beginning and choose a proper duration.
- Configure system profile to include all GPU counters.
- Start capture.
Generated traces are in the perfetto format. They can be viewed directly within AGI and also online in a browser at https://ui.perfetto.dev/, without needing an Android device.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#desktop-gpus","title":"Desktop GPUs","text":"Vulkan supports both graphics and compute, but most tools in the Vulkan ecosystem focus on graphics. As a result, some Vulkan profiling tools expect commands to correspond to a sequence of frames presented to displays via framebuffers. This means additional steps for IREE and other Vulkan applications that solely rely on headless compute. For graphics-focused tools, we need to wrap IREE's logic inside a dummy rendering loop in order to provide the necessary markers for these tools to perform capture and analysis.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#amd","title":"AMD","text":"For AMD GPUs, Radeon GPU Profiler (RGP) is the tool to understand fine details of how IREE GPU performs. See the documentation for details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-gpu-vulkan/#nvidia","title":"NVIDIA","text":"For NVIDIA GPUs, NVIDIA Nsight Graphics is the tool to understand fine details of how IREE GPU performs. See the documentation for details.
","tags":["GPU","Vulkan"]},{"location":"developers/performance/profiling-with-tracy/","title":"Profiling with Tracy","text":"Tracy is a profiler that puts together in a single view both instrumentation and system profiling (sampling, systrace). It's key to understand the nuance here.
- Instrumentation is code built into the process being profiled, collecting timestamps at the start and end of \"zones\". Once it's enabled at build time, it typically just works \u2014 it is a part of our application logic just like anything else, so there's no reason why it would not work.
- Sampling and SysTrace rely on specific system features to collect information on what is actually running. These rely on OS and binary (ELF) file features, so they can take a bit more care to get to work properly.
There are two components to Tracy. They communicate over a TCP socket.
- The \"client\" is the program being profiled.
- The \"server\" is:
- Either the Tracy profiler UI (which we build as
iree-tracy-profiler
), - Or the Tracy command-line capture tool (
iree-tracy-capture
) that can save a trace for later loading in the Tracy profiler UI.
"},{"location":"developers/performance/profiling-with-tracy/#the-tracy-manual","title":"The Tracy manual","text":"The primary source of Tracy documentation, including for build instructions, is a PDF manual that's part of each numbered release. Download or view in browser.
"},{"location":"developers/performance/profiling-with-tracy/#overview","title":"Overview","text":"We will go through each steps below, but here is an overview. It highlights the simpler subset of instructions when only instrumentation is needed, vs. the additional steps needed when Sampling is also wanted.
Component Instrumentation only Instrumentation and Sampling Build Tracy capture (iree-tracy-capture
) Base instructions below for dependencies and build Same Build Tracy profiler (iree-tracy-profiler
) Base instructions below for dependencies and build Same plus capstone-next
instructions for CPU disassembly to work Build the IREE compiler (iree-compile
) for profiling your own modules Nothing particular Same Build the IREE compiler (iree-compile
) for profiling the compiler itself Also need CMake setting: IREE_ENABLE_COMPILER_TRACING
Same Compile your IREE module (run iree-compile
) Nothing particular Also need to pass --iree-llvmcpu-link-embedded=false
(and also, for llvm-cpu
backend, pass --iree-llvmcpu-debug-symbols=true
, but that is currently default). Build IREE device binaries (iree-run-module
etc) Base instructions below (CMake: set IREE_ENABLE_RUNTIME_TRACING
) Also need debug information (Set CMAKE_BUILD_TYPE
to RelWithDebInfo
). Run IREE device binaries loading your modules Nothing particular (May need to set the environment variable TRACY_NO_EXIT=1
for short-running benchmarks) Also need to set the environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
and adjust device security settings or run as root depending on OS. Run Tracy capture (iree-tracy-capture
) to collect the trace If device!=host (e.g. Android), set up TCP port forwarding. Same Build IREE's own tests and benchmark suites with Tracy instrumentation As above, CMake: set IREE_ENABLE_RUNTIME_TRACING
. Also need the CMake setting IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
so that --iree-llvmcpu-link-embedded=false
will be passed to iree-compile
."},{"location":"developers/performance/profiling-with-tracy/#install-dependencies","title":"Install dependencies","text":""},{"location":"developers/performance/profiling-with-tracy/#do-you-need-capstone-next","title":"Do you need capstone-next?","text":"You can skip this section if you don't need disassembly of CPU code.
Capstone is the disassembly framework used by Tracy. The default branch, which is what OS packages still distribute, is running a few years behind current CPU architectures.
Newer CPU architectures such as RISC-V, or newer extensions of existing architectures (e.g. new SIMD instructions in the ARM architecture) are typically only supported in the next
branch. If you need that support, check out and build that branch. Consider uninstalling any OS package for capstone
or otherwise ensure that your IREE build will pick up your next
branch build.
"},{"location":"developers/performance/profiling-with-tracy/#linux","title":"Linux","text":"If you haven't opted to build capstone-next
(see above section), install the OS package for capstone
now (Debian-based distributions):
sudo apt install libcapstone-dev\n
Install other dependencies:
sudo apt install libtbb-dev libzstd-dev libglfw3-dev libfreetype6-dev libgtk-3-dev\n
If you only build the command-line tool iree-tracy-capture
and not the graphical iree-tracy-profiler
, you can install only:
sudo apt install libtbb-dev libzstd-dev\n
The zstd version on Ubuntu 18.04 is old. You will need to install it from source from https://github.com/facebook/zstd.git
"},{"location":"developers/performance/profiling-with-tracy/#mac","title":"Mac","text":"If you haven't opted to build capstone-next
(see above section), install the system capstone
now:
brew install capstone\n
Install other dependencies:
brew install pkg-config glfw freetype tbb zstd\n
"},{"location":"developers/performance/profiling-with-tracy/#build-the-tracy-tools","title":"Build the Tracy tools","text":"A CMake-based build system for Tracy is maintained as part of IREE. In your IREE desktop build directory, set the following CMake option:
cmake -DIREE_BUILD_TRACY=ON -DIREE_ENABLE_LLD=ON .\n
That enables building the Tracy server tools, iree-tracy-profiler
and iree-tracy-capture
, introduced above. It also enables building the tool iree-tracy-csvexport
which can be used to export a captured trace as a CSV file (see Section 6 \"Exporting zone statistics to CSV\" in the Tracy manual).
If profiling on Android/ARM, you might need the patch discussed in the next paragraph.
Consider building without assertions (cmake -DIREE_ENABLE_ASSERTIONS=OFF
). At least iree-tracy-profiler
has some faulty assertions that can cause the profiler UI to crash during normal usage.
Rebuild, either everything or just these specific targets:
cmake --build . --target iree-tracy-profiler iree-tracy-capture iree-tracy-csvexport\n
This should have created the iree-tracy-profiler
, iree-tracy-capture
, and iree-tracy-csvexport
binaries:
$ find . -name iree-tracy-*\n./tracy/iree-tracy-profiler\n./tracy/iree-tracy-capture\n./tracy/iree-tracy-csvexport\n
"},{"location":"developers/performance/profiling-with-tracy/#build-the-iree-compiler-iree-compile","title":"Build the IREE compiler (iree-compile
)","text":"Most people don't need to rebuild iree-compile
at all for Tracy and can skip this section.
If you want to profile iree-compile
itself as opposed to just profiling modules compiled with it, then rebuild it with the CMake setting IREE_ENABLE_COMPILER_TRACING
set to ON
.
"},{"location":"developers/performance/profiling-with-tracy/#compile-your-iree-module-run-iree-compile","title":"Compile your IREE module (run iree-compile
)","text":"If you only want Instrumentation and not Sampling then you don't need anything particular here. Just run iree-compile
as usual.
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling","title":"Additional steps for Sampling","text":"In order for Sampling to work with your compiled modules, add this flag to your iree-compile
command line: --iree-llvmcpu-link-embedded=false
.
For the llvm-cpu
target backend, sampling features also rely on debug information in the compiled module, enabled by --iree-llvmcpu-debug-symbols=true
, but that is currently the default.
When building IREE's own test and benchmark suites, if Tracy Sampling support is wanted, set the CMake setting IREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER
to ON
. It has the effect of passing that --iree-llvmcpu-link-embedded=false
when compiling test/benchmark modules.
"},{"location":"developers/performance/profiling-with-tracy/#build-iree-device-binaries-with-tracy-instrumentation-clients","title":"Build IREE device binaries with Tracy instrumentation (\"clients\")","text":"Set the CMake setting IREE_ENABLE_RUNTIME_TRACING
to ON
and rebuild IREE device binaries, e.g.
cd iree-device-build-dir\ncmake -DIREE_ENABLE_RUNTIME_TRACING=ON .\ncmake --build .\n
Tip - python bindings
The iree-runtime
Python package includes instrumented tools too. Set the IREE_PY_RUNTIME=tracy
environment variable to use them:
$ python -m pip install iree-runtime\n$ IREE_PY_RUNTIME=tracy iree-run-module ...\n
See this section in the Python bindings documentation for more details.
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling_1","title":"Additional steps for Sampling","text":"In order for Sampling features to work, make sure that binaries contain debug information. That usually means changing the CMAKE_BUILD_TYPE
to RelWithDebInfo
instead of Release
.
In your IREE device build directory, set the following CMake options:
cd iree-device-build-dir\ncmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .\n
"},{"location":"developers/performance/profiling-with-tracy/#running-the-profiled-program","title":"Running the profiled program","text":"The basic recipe is to just run your program as usual on the device and, while it is running, run iree-tracy-capture
on the host to connect to it.
In the typical case of a short-running benchmark, one usually runs with the environment variable TRACY_NO_EXIT
defined so that the benchmark does not exit until iree-tracy-capture
has connected to it.
Example:
TRACY_NO_EXIT=1 /data/local/tmp/iree-benchmark-module ... (usual flags)\n
"},{"location":"developers/performance/profiling-with-tracy/#additional-steps-for-sampling_2","title":"Additional steps for Sampling","text":"In order for Sampling to work, the IREE compiled module code mapping must still be accessible by the time Tracy tries to read symbols code. This requires setting the environment variable IREE_PRESERVE_DYLIB_TEMP_FILES
. It is easiest to set it to 1
but one may also set it to an explicit path where to preserve the temporary files.
Example:
TRACY_NO_EXIT=1 IREE_PRESERVE_DYLIB_TEMP_FILES=1 /data/local/tmp/iree-benchmark-module ... (usual flags)\n
Tracing doesn't work properly on VMs (see \"Problematic Platforms / Virtual Machines\" section 2.1.6.4 of the manual). To get sampling, you should run the profiled program on bare metal.
"},{"location":"developers/performance/profiling-with-tracy/#operating-system-settings-required-for-sampling-and-systrace","title":"Operating system settings required for Sampling and SysTrace","text":""},{"location":"developers/performance/profiling-with-tracy/#desktop-linux","title":"Desktop Linux","text":"On desktop Linux, the profiled application must be run as root, e.g. with sudo
. Otherwise, profile data will lack important components.
"},{"location":"developers/performance/profiling-with-tracy/#android","title":"Android","text":"When profiling on an Android device, in order to get the most useful information in the trace, tweak system permissions as follows before profiling. This needs to be done again after every reboot of the Android device.
From your desktop, get a shell on the Android device:
adb shell\n
The following commands are meant to be run from that Android device shell. First, get root access for this shell:
$ su\n#\n
Now run the following commands as root on the Android device:
setenforce 0\nmount -o remount,hidepid=0 /proc\necho 0 > /proc/sys/kernel/perf_event_paranoid\necho 0 > /proc/sys/kernel/kptr_restrict\n
Note: in order for this to work, the device needs to be rooted, which means that the above su
command must succeed. This is sometimes confused with the adb root
command, but that's not the same. adb root
restarts the adbd
daemon as root, which causes device shells to be root shells by default. This is unnecessary here and we don't recommend it: real Android applications never run as root, so Tracy/Android has to support running benchmarks as regular user and it's best to stick to this for the sake of realistic benchmarks. Internally, Tracy executes su
commands to perform certain actions, so it too relies on the device being rooted without relying on the benchmark process being run as root.
"},{"location":"developers/performance/profiling-with-tracy/#resource_exhausted-failed-to-open-file-issue","title":"\"RESOURCE_EXHAUSTED; failed to open file\" issue","text":"This is a known issue with how tracy operates. One way to workaround it is to manually increase the total number of files that can be kept opened simultaneously and run the benchmark command with that setting:
sudo sh -c \"ulimit -n <bigNum> && <myTracyInstrumentedProgram>\"\n
Explanation:
Tracy keeps a number of file descriptors open that, depending on the machine and its settings, may exceed the limit allowed by the system resulting in iree
to fail to open more files. In particular, it is commom to have a relatively low limit when running with sudo
.
"},{"location":"developers/performance/profiling-with-tracy/#running-the-tracy-capture-cli-connecting-and-saving-profiles","title":"Running the Tracy Capture CLI, connecting and saving profiles","text":"While the program that you want to profile is still running (thanks to TRACY_NO_EXIT=1
), start the Tracy capture tool in another terminal. From the IREE build directory:
tracy/iree-tracy-capture -o myprofile.tracy\nConnecting to 127.0.0.1:8086...\n
It should connect to the IREE client and save the output to myprofile.tracy that can be visualized by the client below. You can start the capture tool first to make sure you don't miss any capture events.
Note that the connection uses TCP port 8086. If the Tracy-instrumented program is running on a separate machine, this port needs to be forwarded. In particular, when benchmarking on Android, this is needed:
adb forward tcp:8086 tcp:8086\n
"},{"location":"developers/performance/profiling-with-tracy/#running-the-tracy-profiler-ui-connecting-and-visualizing","title":"Running the Tracy profiler UI, connecting and visualizing","text":"If you have previously captured a tracy file (previous section), this command should succeed loading it (from the IREE build directory):
tracy/iree-tracy-profiler myprofile.tracy\n
Alternatively, while the program that you want to profile is still running (possibly thanks to TRACY_NO_EXIT=1
), the Tracy profiler can connect to it directly (so it is not required to capture the trace into a file): just running
tracy/iree-tracy-profiler\n
should show a dialog offering to connect to a client i.e. a profiled program:
If connecting doesn't work:
- If the profiled program is on a separate machine, make sure you've correctly set up port forwarding.
- On Android, the
adb forward
may need to be run again. - Make sure that the profiled program is still running. Do you need
TRACY_NO_EXIT=1
? - Kill the profiled program and restart it.
You should now start seeing a profile. The initial view should look like this:
Before going further, take a second to check that your recorded profile data has all the data that it should have. Permissions issues, as discussed above, could cause it to lack \"sampling\" or \"CPU data\" information, particularly on Android. For example, here is what he initial view looks like when one forgot to run the profiled program as root on Desktop Linux (where running as root is required, as explained above):
Notice how the latter screenshot is lacking the following elements:
- No 'CPU data' header on the left side, with the list of all CPU cores. The 'CPU usage' graph is something else.
- No 'ghost' icon next to the 'Main thread' header.
Click the 'Statistics' button at the top. It will open a window like this:
See how the above screenshot has two radio buttons at the top: 'Instrumentation' and 'Sampling'. At this point, if you don't see the 'Sampling' radio button, you need to resolve that first, as discussed above about possible permissions issues.
These 'Instrumentation' and 'Sampling' statistics correspond the two kinds of data that Tracy collects about your program. In the Tracy main view, they correspond, respectively, to 'instrumentation' and 'ghost' zones. Refer to the Tracy PDF manual for a general introduction to these concepts. For each thread, the ghost icon toggles the view between these two kinds of zones.
Back to the main view, look for the part of the timeline that is of interest to you. Your area of interest might not be on the Main thread. In fact, it might be on a thread that's not visible in the initial view at all. To pan around with the mouse, hold the right mouse button down (or its keyboard equivalent on macOS). Alternatively, look for the 'Frame' control at the top of the Tracy window. Use the 'next frame' arrow button until more interesting threads appear.
IREE module code tends to run on a thread whose name contains the word worker
.
Once you have identified the thread of interest, you typically want to click its ghost icon to view its \"ghost\" (i.e. sampling) zones.
Here is what you should get when clicking on a ghost zone:
The percentages column to the left of the disassembly shows where time is being spent. This is unique to the sampling data (ghost zones) and has no equivalent in the instrumentation data (instrumentation zones). Here is what we get clicking on the corresponding instrumentation zone:
This still has a 'Source' button but that only shows the last C++ caller that had explicit Tracy information, so here we see a file under iree/hal
whereas the Ghost zone saw into the IREE compiled module that that calls into, with the source view pointing to the .mlir
file.
"},{"location":"developers/performance/profiling-with-tracy/#configuring-tracy-instrumentation","title":"Configuring Tracy instrumentation","text":"Set IREE's IREE_TRACING_MODE
value (defined in iree/base/tracing.h) to adjust which tracing features, such as allocation tracking and callstacks, are enabled.
"},{"location":"developers/performance/profiling/","title":"Profiling overview","text":"IREE benchmarking gives us an accurate and reproducible view of program performance at specific levels of granularity. To analyze system behavior in more depth, there are various ways to profile IREE.
"},{"location":"developers/performance/profiling/#cpu-cache-and-other-cpu-event-profiling","title":"CPU cache and other CPU event profiling","text":"For some advanced CPU profiling needs such as querying CPU cache and other events, one may need to use some OS-specific profilers. See Profiling CPUs.
"},{"location":"developers/performance/profiling/#vulkan-gpu-profiling","title":"Vulkan GPU Profiling","text":"Tracy offers great insights into CPU/GPU interactions and Vulkan API usage details. However, information at a finer granularity, especially inside a particular shader dispatch, is missing. To supplement general purpose tools like Tracy, vendor-specific tools can be used. Refer to Profiling GPUs using Vulkan.
"},{"location":"developers/performance/profiling/#tracy","title":"Tracy","text":"Tracy is a profiler that's been used for a wide range of profiling tasks on IREE. Refer to Profiling with Tracy.
"},{"location":"guides/","title":"Guides","text":""},{"location":"guides/#ml-frameworks","title":"ML frameworks","text":"Start here: ML frameworks overview
Guides for specific frameworks:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
"},{"location":"guides/#deployment-configurations","title":"Deployment configurations","text":"Start here: Deplyment configurations overview
Guides for specific configurations:
- CPU for general purpose CPU deployment
- CPU - Bare-Metal with minimal platform dependencies
- GPU - Vulkan for cross-platform usage and interop with graphics applications
- GPU - CUDA for NVIDIA-specific solutions
- GPU - ROCm for AMD-specific solutions
- GPU - Metal for running on Apple hardware
"},{"location":"guides/deployment-configurations/","title":"Deployment configurations","text":"IREE provides a flexible set of tools for various deployment scenarios. Fully featured environments can use IREE to load programs on demand and to take advantage of multi-threaded hardware, while embedded systems can bypass IREE's runtime entirely or interface with custom accelerators.
"},{"location":"guides/deployment-configurations/#stable-configurations","title":"Stable configurations","text":" - CPU for general purpose CPU deployment
- CPU - Bare-Metal with minimal platform dependencies
- GPU - Vulkan for cross-platform usage and interop with graphics applications
- GPU - CUDA for NVIDIA-specific solutions
- GPU - ROCm for AMD-specific solutions
- GPU - Metal for running on Apple hardware
These are just the most stable configurations IREE supports. Feel free to reach out on any of IREE's communication channels if you have questions about a specific platform, hardware accelerator, or set of system features.
"},{"location":"guides/deployment-configurations/#compiler-target-backends","title":"Compiler target backends","text":"Compiler target backends are used to generate executable code for hardware APIs and device architectures. Compiler targets may implement special optimizations or generate distinct code for certain device/architecture/performance profiles.
When compiling programs, a list of target backends must be specified via
--iree-hal-target-backends=
(command-line) target_backends=[...]
(Python)
Target backend Description Compatible HAL devices llvm-cpu
Code generation for CPU-like devices supported by LLVM local-sync
, local-task
vmvx
Portable interpreter powered by a microkernel library local-sync
, local-task
vulkan
orvulkan-spirv
Portable GPU support via SPIR-V for Vulkan vulkan
cuda
NVIDIA GPU support via PTX for CUDA cuda
metal
ormetal-spirv
GPU support on Apple platforms via MSL for Metal metal
rocm
Experimental AMD GPU support via HSACO for ROCm rocm
webgpu-wgsl
Experimental GPU support on the Web via WGSL for WebGPU webgpu
Tip - listing available backends
The list of compiler target backends can be queried:
Command-linePython bindings $ iree-compile --iree-hal-list-target-backends\n\nRegistered target backends:\n cuda\n llvm-cpu\n metal\n metal-spirv\n rocm\n vmvx\n vmvx-inline\n vulkan\n vulkan-spirv\n
iree.compiler.query_available_targets()\n\n['cuda',\n 'llvm-cpu',\n 'metal',\n 'metal-spirv',\n 'rocm',\n 'vmvx',\n 'vmvx-inline',\n 'vulkan',\n 'vulkan-spirv']\n
"},{"location":"guides/deployment-configurations/#runtime-hal-driversdevices","title":"Runtime HAL drivers/devices","text":"Runtime HAL devices call into hardware APIs to load and run executable code. Devices may use multithreading or other system resources, depending on their focus and the build configuration.
HAL device Description local-sync
Synchronous local CPU device with inline execution local-task
Multithreaded local CPU device using a 'task' executor vulkan
Portable GPU execution using the Vulkan API cuda
NVIDIA GPU execution using CUDA metal
GPU execution on Apple platforms using Metal rocm
Experimental AMD GPU execution using ROCm webgpu
Experimental GPU execution on the web using WebGPU Additional HAL drivers can also be defined external to the core project via IREE_EXTERNAL_HAL_DRIVERS
.
"},{"location":"guides/deployment-configurations/bare-metal/","title":"Running on a bare-metal platform","text":"IREE supports model execution via CPU on bare-metal platforms. Bare metal platforms have no operating system support, and executables are built using machine-specific linker scripts and/or board support packages (BSPs).
Bare-metal deployment typically uses IREE's LLVM compiler target backend much like the CPU configuration, but using a limited subset of IREE's CPU HAL driver code at runtime to load and execute compiled programs.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#prerequisites","title":"Prerequisites","text":"Out-of-tree bare-metal platform tools and source code for the system should be ready, such as
- Compilation toolchain
- Platform linker script
- Firmware libraries
Please follow the instructions to retrieve the IREE compiler.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compile-the-model-for-bare-metal","title":"Compile the model for bare-metal","text":"The model can be compiled with the following command:
iree-compile \\\n--iree-stream-partitioning-favor=min-peak-memory \\\n--iree-hal-target-backends=llvm-cpu \\\n--iree-llvmcpu-target-triple=x86_64-pc-linux-elf \\\n--iree-llvmcpu-debug-symbols=false \\\nsamples/models/simple_abs.mlir \\\n-o /tmp/simple_abs_cpu.vmfb\n
In which
--iree-stream-partitioning-favor=min-peak-memory
: Optimize for minimum peak memory usage at the cost of concurrency - include when targeting single-threaded execution to reduce memory consumption. --iree-hal-target-backends=llvm-cpu
: Compile using the LLVM CPU target --iree-llvmcpu-target-triple
: Use the <arch>-pc-linux-elf
LLVM target triple so the artifact has a fixed ABI to be rendered by the elf_module library --iree-llvmcpu-debug-symbols=false
: To reduce the artifact size
See generate.sh for example command-line instructions of some common architectures.
You can replace the MLIR file with the other MLIR model files, following the instructions.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#compiling-the-bare-metal-model-for-static-library-support","title":"Compiling the bare-metal model for static-library support","text":"See the static_library demo sample for an example and instructions on running a model with IREE's static_library_loader
.
By default, the demo targets the host machine when compiling. To produce a bare-metal compatible model, run iree-compile
as in the previous example and add the additional -iree-llvmcpu-static-library-output-path=
flag to specify the static library destination. This will produce a .h\\.o
file to link directly into the target application.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#build-bare-metal-runtime-from-source","title":"Build bare-metal runtime from source","text":"A few CMake options and macros should be set to build a subset of IREE runtime libraries compatible with the bare-metal platform. We assume there's no multi-thread control nor system library support in the bare-metal system. The model execution is in a single-thread synchronous fashion.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#set-cmake-options","title":"Set CMake options","text":"# Build the IREE runtime only\nset(IREE_BUILD_COMPILER OFF)\n\n# Tell CMake to skip targeting a specific operating system\nset(CMAKE_SYSTEM_NAME Generic)\n\n# Disable multi-thread library support\nset(IREE_ENABLE_THREADING OFF)\n\n# Only enable the local synchronous HAL driver\nset(IREE_HAL_DRIVER_DEFAULTS OFF)\nset(IREE_HAL_DRIVER_LOCAL_SYNC ON)\n\n# Only enable some executable loaders\nset(IREE_HAL_EXECUTABLE_LOADER_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF ON)\nset(IREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE ON)\n\n# Only enable the embedded ELF executable plugin\nset(IREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS OFF)\nset(IREE_HAL_EXECUTABLE_PLUGIN_EMBEDDED_ELF ON)\n\n# Disable tests until IREE supports running them on bare-metal platforms\nset(IREE_BUILD_TESTS OFF)\n\n# Build samples\nset(IREE_BUILD_SAMPLES ON)\n
Todo
Clean the list up after #6353 is fixed.
Also, set the toolchain-specific cmake file to match the tool path, target architecture, target abi, linker script, system library path, etc.
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#define-iree-macros","title":"Define IREE macros","text":"These macros should be defined, either in C/C++ or via CMake options like
set(MY_FLAGS \"-DIREE_PLATFORM_GENERIC=1\")\nset(CMAKE_C_FLAGS ${MY_FLAGS} ${CMAKE_C_FLAGS})\nset(CMAKE_CXX_FLAGS ${MY_FLAGS} ${CMAKE_CXX_FLAGS})\n
Macro Description IREE_PLATFORM_GENERIC
Let IREE build the runtime library without targeting a specific platform. IREE_SYNCHRONIZATION_DISABLE_UNSAFE=1
Disable thread synchronization support.Must only be used if there's a single thread. IREE_FILE_IO_ENABLE=0
Disable file I/O. IREE_TIME_NOW_FN
A function to return the system time. For the bare-metal systems, it can be set as IREE_TIME_NOW_FN=\\\"\\{ return 0;\\}\\\"
as there's no asynchronous wait handling. IREE_WAIT_UNTIL_FN
A function to wait until the given time in nanoseconds. Must match the signature bool(uint64_t nanos)
and return false if the wait failed. Examples of how to setup the CMakeLists.txt and .cmake file:
- IREE RISC-V toolchain cmake
- IREE Bare-Metal Arm Sample
- IREE Bare-Metal RV32 Sample
","tags":["CPU"]},{"location":"guides/deployment-configurations/bare-metal/#bare-metal-execution-example","title":"Bare-metal execution example","text":"See simple_embedding for generic platform to see how to use the IREE runtime library to build/run the IREE model for the bare-metal target.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/","title":"CPU deployment","text":"IREE supports efficient program execution on CPU devices by using LLVM to compile all dense computations in each program into highly optimized CPU native instruction streams, which are embedded in one of IREE's deployable formats.
To compile a program for CPU execution, pick one of IREE's supported executable formats:
Executable Format Description embedded ELF portable, high performance dynamic library system library platform-specific dynamic library (.so, .dll, etc.) VMVX reference target At runtime, CPU executables can be loaded using one of IREE's CPU HAL drivers:
local-task
: asynchronous, multithreaded driver built on IREE's \"task\" system local-sync
: synchronous, single-threaded driver that executes work inline
Todo
Add IREE's CPU support matrix: what architectures are supported; what architectures are well optimized; etc.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#prerequisites","title":"Prerequisites","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the LLVM-based CPU compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation or iOS cross-compilation page if you are cross compiling for a mobile device. The llvm-cpu
compiler backend is compiled in by default on all platforms.
Ensure that the IREE_TARGET_BACKEND_LLVM_CPU
CMake option is ON
when configuring for the host.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#get-the-iree-runtime","title":"Get the IREE runtime","text":"You will need to get an IREE runtime that supports the local CPU HAL driver, along with the appropriate executable loaders for your application.
You can check for CPU support by looking for the local-sync
and local-task
drivers:
$ iree-run-module --list_drivers\n\n cuda: CUDA (dynamic)\n local-sync: Local execution using a lightweight inline synchronous queue\n local-task: Local execution using the IREE multithreading task system\n vulkan: Vulkan 1.x (dynamic)\n
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The local CPU HAL drivers are compiled in by default on all platforms.
Ensure that the IREE_HAL_DRIVER_LOCAL_TASK
and IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF
(or other executable loader) CMake options are ON
when configuring for the target.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-and-run-a-program","title":"Compile and run a program","text":"With the requirements out of the way, we can now compile a model and run it.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the llvm-cpu
target:
iree-compile \\\n--iree-hal-target-backends=llvm-cpu \\\nmobilenet_iree_input.mlir -o mobilenet_cpu.vmfb\n
Tip - CPU targets
The --iree-llvmcpu-target-triple
flag tells the compiler to generate code for a specific type of CPU. You can see the list of supported targets with iree-compile --iree-llvmcpu-list-targets
, or pass \"host\" to let LLVM infer the triple from your host machine (e.g. x86_64-linux-gnu
).
$ iree-compile --iree-llvmcpu-list-targets\n\n Registered Targets:\n aarch64 - AArch64 (little endian)\n aarch64_32 - AArch64 (little endian ILP32)\n aarch64_be - AArch64 (big endian)\n arm - ARM\n arm64 - ARM64 (little endian)\n arm64_32 - ARM64 (little endian ILP32)\n armeb - ARM (big endian)\n riscv32 - 32-bit RISC-V\n riscv64 - 64-bit RISC-V\n wasm32 - WebAssembly 32-bit\n wasm64 - WebAssembly 64-bit\n x86 - 32-bit X86: Pentium-Pro and above\n x86-64 - 64-bit X86: EM64T and AMD64\n
Tip - CPU features
The --iree-llvmcpu-target-cpu-features
flag tells the compiler to generate code using certain CPU \"features\", like SIMD instruction sets. Like the target triple, you can pass \"host\" to this flag to let LLVM infer the features supported by your host machine.
","tags":["CPU"]},{"location":"guides/deployment-configurations/cpu/#run-a-compiled-program","title":"Run a compiled program","text":"In the build directory, run the following command:
tools/iree-run-module \\\n--device=local-task \\\n--module=mobilenet_cpu.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["CPU"]},{"location":"guides/deployment-configurations/gpu-cuda/","title":"GPU deployment using CUDA","text":"IREE can accelerate model execution on Nvidia GPUs using CUDA.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#prerequisites","title":"Prerequisites","text":"In order to use CUDA to drive the GPU, you need to have a functional CUDA environment. It can be verified by the following steps:
nvidia-smi | grep CUDA\n
If nvidia-smi
does not exist, you will need to install the latest CUDA Toolkit SDK.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the CUDA compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build the IREE compiler, then enable the CUDA compiler target with the IREE_TARGET_BACKEND_CUDA
option.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that includes the CUDA HAL driver.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE from source, then enable the CUDA HAL driver with the IREE_HAL_DRIVER_CUDA
option.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"With the compiler and runtime ready, we can now compile programs and run them on GPUs.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:
iree-compile \\\n--iree-hal-target-backends=cuda \\\n--iree-hal-cuda-llvm-target-arch=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_cuda.vmfb\n
Note that a cuda target architecture (iree-hal-cuda-llvm-target-arch
) of the form sm_<arch_number>
is needed to compile towards each GPU architecture. If no architecture is specified then we will default to sm_35
.
Here is a table of commonly used architectures:
CUDA GPU Target Architecture Nvidia K80 sm_35
Nvidia P100 sm_60
Nvidia V100 sm_70
Nvidia A100 sm_80
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-cuda/#run-a-compiled-program","title":"Run a compiled program","text":"Run the following command:
iree-run-module \\\n--device=cuda \\\n--module=mobilenet_cuda.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["GPU","CUDA"]},{"location":"guides/deployment-configurations/gpu-metal/","title":"GPU deployment using Metal","text":"Documentation coming soon!
","tags":["GPU","iOS"]},{"location":"guides/deployment-configurations/gpu-rocm/","title":"GPU deployment using ROCm","text":"IREE can accelerate model execution on AMD GPUs using ROCm.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#prerequisites","title":"Prerequisites","text":"In order to use ROCm to drive the GPU, you need to have a functional ROCm environment. It can be verified by the following steps:
rocm-smi | grep rocm\n
If rocm-smi
does not exist, you will need to install the latest ROCm Toolkit SDK for Windows or Linux.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-compiler","title":"Get the IREE compiler","text":"","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Currently ROCm is NOT supported for the Python interface.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build the IREE compiler, then enable the ROCm compiler target with the IREE_TARGET_BACKEND_ROCM
option.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that includes the ROCm HAL driver.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE from source, then enable the experimental ROCm HAL driver with the IREE_EXTERNAL_HAL_DRIVERS=rocm
option.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-and-run-a-program-model","title":"Compile and run a program model","text":"With the compiler and runtime ready, we can now compile programs and run them on GPUs.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run one of the following commands to compile:
iree-compile \\\n--iree-hal-target-backends=rocm \\\n--iree-rocm-target-chip=<...> \\\n--iree-rocm-link-bc=true \\\n--iree-rocm-bc-dir=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_rocm.vmfb\n
Note ROCm Bitcode Dir (iree-rocm-bc-dir
) path is required. If the system you are compiling IREE in has ROCm installed, then the default value of /opt/rocm/amdgcn/bitcode
will usually suffice. If you intend on building ROCm compiler in a non-ROCm capable system, please set iree-rocm-bc-dir
to the absolute path where you might have saved the amdgcn bitcode.
Note that a ROCm target chip (iree-rocm-target-chip
) of the form gfx<arch_number>
is needed to compile towards each GPU architecture. If no architecture is specified then we will default to gfx908
.
Here is a table of commonly used architectures:
AMD GPU Target Chip AMD MI25 gfx900
AMD MI50 gfx906
AMD MI60 gfx906
AMD MI100 gfx908
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-rocm/#run-a-compiled-program","title":"Run a compiled program","text":"Run the following command:
iree-run-module \\\n--device=rocm \\\n--module=mobilenet_rocm.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
","tags":["GPU"]},{"location":"guides/deployment-configurations/gpu-vulkan/","title":"GPU deployment using Vulkan","text":"IREE can accelerate model execution on GPUs via Vulkan, a low-overhead graphics and compute API. Vulkan is cross-platform: it is available on many operating systems, including Android, Linux, and Windows. Vulkan is also cross-vendor: it is supported by most GPU vendors, including AMD, ARM, Intel, NVIDIA, and Qualcomm.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#support-matrix","title":"Support matrix","text":"As IREE and the compiler ecosystem it operates within matures, more target specific optimizations will be implemented. At this stage, expect reasonable performance across all GPUs and for improvements to be made over time for specific vendors and architectures.
GPU Vendor Category Performance Focus Architecture ARM Mali GPU Mobile Good Valhall Qualcomm Adreno GPU Mobile Reasonable 640+ AMD GPU Desktop/server Reasonable - NVIDIA GPU Desktop/server Good -","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#prerequisites","title":"Prerequisites","text":"In order to use Vulkan to drive the GPU, you need to have a functional Vulkan environment. IREE requires Vulkan 1.1 on Android and 1.2 elsewhere. It can be verified by the following steps:
Android Linux Windows Android mandates Vulkan 1.1 support since Android 10. You just need to make sure the device's Android version is 10 or higher.
Run the following command in a shell:
vulkaninfo | grep apiVersion\n
If vulkaninfo
does not exist, you will need to install the latest Vulkan SDK. Installing via LunarG's package repository is recommended, as it places Vulkan libraries and tools under system paths so it's easy to discover.
If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.
Run the following command in a shell:
vulkaninfo | grep apiVersion\n
If vulkaninfo
does not exist, you will need to install the latest Vulkan SDK.
If the listed version is lower than Vulkan 1.2, you will need to update the driver for your GPU.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-compiler","title":"Get the IREE compiler","text":"Vulkan expects the program running on GPU to be expressed by the SPIR-V binary exchange format, which the model must be compiled into.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#download-the-compiler-from-a-release","title":"Download the compiler from a release","text":"Python packages are regularly published to PyPI. See the Python Bindings page for more details. The core iree-compiler
package includes the SPIR-V compiler:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install iree-compiler\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade iree-compiler\n
Tip
iree-compile
is installed to your python module installation path. If you pip install with the user mode, it is under ${HOME}/.local/bin
, or %APPDATA%Python
on Windows. You may want to include the path in your system's PATH
environment variable:
export PATH=${HOME}/.local/bin:${PATH}\n
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-compiler-from-source","title":"Build the compiler from source","text":"Please make sure you have followed the Getting started page to build IREE for your host platform and the Android cross-compilation page if you are cross compiling for Android. The SPIR-V compiler backend is compiled in by default on all platforms.
Ensure that the IREE_TARGET_BACKEND_VULKAN_SPIRV
CMake option is ON
when configuring for the host.
Tip
iree-compile
will be built under the iree-build/tools/
directory. You may want to include this path in your system's PATH
environment variable.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#get-the-iree-runtime","title":"Get the IREE runtime","text":"Next you will need to get an IREE runtime that supports the Vulkan HAL driver.
You can check for Vulkan support by looking for a matching driver and device:
$ iree-run-module --list_drivers\n\n cuda: CUDA (dynamic)\n local-sync: Local execution using a lightweight inline synchronous queue\n local-task: Local execution using the IREE multithreading task system\n vulkan: Vulkan 1.x (dynamic)\n
$ iree-run-module --list_devices\n\n cuda://GPU-00000000-1111-2222-3333-444444444444\n local-sync://\n local-task://\n vulkan://00000000-1111-2222-3333-444444444444\n
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#build-the-runtime-from-source","title":"Build the runtime from source","text":"Please make sure you have followed the Getting started page to build IREE for Linux/Windows and the Android cross-compilation page for Android. The Vulkan HAL driver is compiled in by default on non-Apple platforms.
Ensure that the IREE_HAL_DRIVER_VULKAN
CMake option is ON
when configuring for the target.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-and-run-a-program","title":"Compile and run a program","text":"With the SPIR-V compiler and Vulkan runtime, we can now compile programs and run them on GPUs.
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#compile-a-program","title":"Compile a program","text":"The IREE compiler transforms a model into its final deployable format in many sequential steps. A model authored with Python in an ML framework should use the corresponding framework's import tool to convert into a format (i.e., MLIR) expected by the IREE compiler first.
Using MobileNet v2 as an example, you can download the SavedModel with trained weights from TensorFlow Hub and convert it using IREE's TensorFlow importer. Then run the following command to compile with the vulkan-spirv
target:
iree-compile \\\n--iree-hal-target-backends=vulkan-spirv \\\n--iree-vulkan-target-triple=<...> \\\nmobilenet_iree_input.mlir -o mobilenet_vulkan.vmfb\n
Note
A target triple of the form <vendor/arch>-<product>-<os>
is needed to compile towards each GPU architecture. If no triple is specified then a safe but more limited default will be used. We don't support the full spectrum here1; the following table summarizes the currently recognized ones:
GPU Vendor Target Triple ARM Mali GPU e.g., valhall-g78-android30
Qualcomm Adreno GPU e.g., adreno-unknown-android30
AMD GPU e.g., rdna1-5700xt-linux
NVIDIA GPU e..g, ampere-rtx3080-windows
SwiftShader CPU cpu-swiftshader-unknown
","tags":["GPU","Vulkan"]},{"location":"guides/deployment-configurations/gpu-vulkan/#run-a-compiled-program","title":"Run a compiled program","text":"In the build directory, run the following command:
tools/iree-run-module \\\n--device=vulkan \\\n--module=mobilenet_vulkan.vmfb \\\n--function=predict \\\n--input=\"1x224x224x3xf32=0\"\n
The above assumes the exported function in the model is named as predict
and it expects one 224x224 RGB image. We are feeding in an image with all 0 values here for brevity, see iree-run-module --help
for the format to specify concrete values.
-
It's also impossible to capture all details of a Vulkan implementation with a target triple, given the allowed variances on extensions, properties, limits, etc. So the target triple is just an approximation for usage.\u00a0\u21a9
","tags":["GPU","Vulkan"]},{"location":"guides/ml-frameworks/","title":"ML frameworks","text":"IREE supports popular machine learning frameworks using the same underlying technology.
graph LR\n accTitle: ML framework to runtime deployment workflow overview\n accDescr {\n Programs start in some ML framework.\n Programs are imported into MLIR.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n A[ML frameworks]\n B[Imported MLIR]\n C[IREE compiler]\n D[Runtime deployment]\n\n A --> B\n B --> C\n C --> D
"},{"location":"guides/ml-frameworks/#supported-frameworks","title":"Supported frameworks","text":"See end-to-end examples of how to use each framework with IREE:
- TensorFlow and TensorFlow Lite
- JAX
- PyTorch
Importing from other frameworks is planned - stay tuned!
"},{"location":"guides/ml-frameworks/#samples","title":"Samples","text":"Check out the samples in IREE's samples/
directory, as well as the iree-samples repository.
"},{"location":"guides/ml-frameworks/#exportimport","title":"Export/Import","text":"Each machine learning framework has some \"export\" mechanism that snapshots the structure and data in your program. These exported programs can then be \"imported\" into IREE's compiler by using either a stable import format or one of IREE's importer tools.
This export/import process is specific to each frontend and typically involves a number of stages:
- Capture/trace/freeze the ML model into a graph
- Write that graph to an interchange format (e.g. SavedModel, TorchScript)
- Load the saved program into an import tool and convert to MLIR
- Legalize the graph's operations so only IREE-compatible operations remain
- Write the imported MLIR to a file
This fully imported form can then be compiled indepedently of the source language and framework.
"},{"location":"guides/ml-frameworks/#compilation","title":"Compilation","text":"IREE compiles MLIR files for specified sets of backends (CPU, GPU, etc). Each backend generates optimized native code custom to the input program and intended target platform. Once compiled, modules can be executed using IREE's runtime.
See the deployment configuration guides for details on selecting a compiler backend and tuning options for your choice of target platform(s) or device(s).
"},{"location":"guides/ml-frameworks/#execution","title":"Execution","text":"Compiled modules can be executed by selecting what compute devices to use, loading the module, and then executing it with the intended inputs. IREE provides several language bindings for its runtime API.
"},{"location":"guides/ml-frameworks/jax/","title":"JAX integration","text":"Note
IREE's JAX support is under active development. This page is still under construction.
","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/jax/#overview","title":"Overview","text":"IREE offers two ways to interface with JAX programs:
- An API for extracting and compiling full models ahead of time (AOT) for execution apart from JAX. This API is being developed in the iree-org/iree-jax repository.
- A PJRT plugin that adapts IREE as a native JAX backend for online / just in time (JIT) use. This plugin is being developed in the openxla/openxla-pjrt-plugin repository.
","tags":["Python","JAX"]},{"location":"guides/ml-frameworks/pytorch/","title":"PyTorch + IREE =","text":"Caution - under development
We are still validating and fixing specific models. Between bug fixes in flight and releases running behind, we don't expect that you will be able to do a lot of advanced things without using nightly releases or working with us.
Stay tuned and join the discussion in our Discord server's #pytorch
channel.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#overview","title":"Overview","text":"SHARK-Turbine offers a tight integration between compatible versions of IREE, torch-mlir, and PyTorch.
- Seamless integration with standard PyTorch workflows
- Deployment support for running PyTorch models on cloud and edge devices
- General purpose model compilation and execution tools
Both just-in-time (JIT) and ahead-of-time (AOT) workflows are supported:
graph LR\n accTitle: PyTorch integration overview\n accDescr {\n PyTorch programs can be optimized within a Python session with\n SHARK-Turbine's just-in-time tools.\n PyTorch programs can be exported out of Python to native binaries using\n SHARK-Turbine's ahead-of-time export toolkit.\n }\n\n subgraph Python\n pytorch(PyTorch)\n subgraph turbine [SHARK-Turbine]\n jit(\"Eager execution (JIT)\")\n aot(\"Export toolkit (AOT)\")\n end\n\n pytorch --> jit\n jit --> pytorch\n pytorch --> aot\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n aot -.-> binary
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#prerequisites","title":"Prerequisites","text":"Install Turbine and its requirements:
python -m pip install shark-turbine\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#just-in-time-jit-execution","title":"Just-in-time (JIT) execution","text":"Just-in-time integration allows for Python code using TorchDynamo to optimize PyTorch models/functions using IREE, all within an interactive Python session.
graph TD\n accTitle: PyTorch JIT workflow overview\n accDescr {\n Programs start as either PyTorch nn.Module objects or callable functions.\n Programs are compiled into optimized modules using torch.compile.\n Within torch.compile, Dynamo runs the program through Turbine and IREE.\n }\n\n subgraph Python\n input([nn.Module / function])\n\n subgraph compile [\"torch.compile()\"]\n direction LR\n dynamo{{TorchDynamo}}\n turbine{{SHARK-Turbine}}\n iree{{IREE}}\n dynamo --> turbine --> iree\n end\n\n output([Optimized module])\n input --> compile --> output\n end
For deployment outside of Python, see the ahead-of-time sections below.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#quickstart","title":"Quickstart","text":"Turbine integrates into PyTorch as a custom backend for torch.compile
.
Behind the scenes, PyTorch captures the structure of the input model into a computation graph and feeds that graph through to the selected backend compiler.
import torch\n\n# Define the `nn.Module` or Python function to run.\nclass LinearModule(torch.nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n def forward(self, input):\n return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Compile the program using the turbine backend.(1)\nopt_linear_module = torch.compile(linear_module, backend=\"turbine_cpu\")\n\n# Use the compiled program as you would the original program.\nargs = torch.randn(4)\nturbine_output = opt_linear_module(args)\n
- Initial integration only supports CPU, but support for many of IREE's other targets is coming soon.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples","title":"Samples","text":"Code samples JIT compilation notebook Simple MLP eager examples/eager_mlp/mlp_eager_simple.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#ahead-of-time-aot-export","title":"Ahead-of-time (AOT) export","text":"The ahead-of-time toolkit allows developers to define a program's structure in Python and then export deployment-ready artifacts that can be used in IREE's deployment configurations via the API bindings.
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#simple-api","title":"Simple API","text":"For simple models, a one-shot export API is available.
graph LR\n accTitle: PyTorch simple AOT workflow overview\n accDescr {\n Programs start as PyTorch nn.Module objects.\n Modules are exported using the \"aot\" API.\n Exported outputs are then compiled to .vmfb files with executable binaries.\n }\n\n subgraph Python\n input([nn.Module])\n export([\"ExportOutput (MLIR)\"])\n input -- \"aot.export()\" --> export\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n export -. \"compile()\" .-> binary
import iree.runtime as ireert\nimport numpy as np\nimport shark_turbine.aot as aot\nimport torch\n\n# Define the `nn.Module` to export.\nclass LinearModule(torch.nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))\n self.bias = torch.nn.Parameter(torch.randn(out_features))\n\n def forward(self, input):\n return (input @ self.weight) + self.bias\n\nlinear_module = LinearModule(4, 3)\n\n# Export the program using the simple API.\nexample_arg = torch.randn(4)\nexport_output = aot.export(linear_module, example_arg)\n\n# Compile to a deployable artifact.\nbinary = export_output.compile(save_to=None)\n\n# Use the IREE runtime API to test the compiled program.\nconfig = ireert.Config(\"local-task\")\nvm_module = ireert.load_vm_module(\n ireert.VmModule.wrap_buffer(config.vm_instance, binary.map_memory()),\n config,\n)\ninput = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)\nresult = vm_module.main(input)\nprint(result.to_host())\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_1","title":"Samples","text":"Code samples Simple AOT export notebook Simple MLP export examples/aot_mlp/mlp_export_simple.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#advanced-api","title":"Advanced API","text":"For more complex models, an underlying advanced API is available that gives access to more features.
graph LR\n accTitle: PyTorch advanced AOT workflow overview\n accDescr {\n Programs are represented using the aot.CompiledModule class.\n CompiledModules can extend nn.Module objects, export globals, and set\n shapes and dtypes for each function.\n Modules are exported using the \"aot\" API.\n Exported outputs are then compiled to .vmfb files with executable binaries.\n }\n\n subgraph Python\n compiledmodule(\"aot.CompiledModule\\n\\n- extend nn.Module\\n- export globals\\n- set shapes/dtypes\")\n export([\"ExportOutput (MLIR)\"])\n compiledmodule -- \"aot.export()\" --> export\n end\n\n subgraph Native\n binary([\"binary (.vmfb)\"])\n end\n\n export -. \"compile()\" .-> binary
Advanced export workflows can use the aot.CompiledModule
class to define and constrain the structure of a program prior to compiling it.
import shark_turbine.aot as aot\n\n# A minimal program, with no functions or variables.\nclass BasicModule(aot.CompiledModule):\n ...\n\n# Create an instance of the program and convert it to MLIR.\nfrom iree.compiler.ir import Context\ninstance = BasicModule(context=Context())\nmodule_str = str(aot.CompiledModule.get_mlir_module(instance))\n\nprint(module_str)\n# module @basic {\n# }\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#exporting-functions","title":"Exporting functions","text":"Exported functions are the API entry points into a compiled program.
Simple feed-forward neural networks used for inference may have a single exported function (typically called \"forward\"), while more complex programs can have multiple computation functions, initialization functions, \"backward\" methods for training, state management functions, debugging functions, etc.
-
Each instance method on a aot.CompiledModule
-derived class is exported. These instance methods can include calls to other aot
components, such as aot.jittable
compute functions:
class GetOnesModule(aot.CompiledModule):\n @aot.jittable\n def compute_ones():\n return torch.ones(3)\n\n def get_ones(self):\n return self.compute_ones()\n
-
Instance methods can use aot.AbstractTensor
to specify data types:
class IntSumModule(aot.CompiledModule):\n @aot.jittable\n def compute_sum(a, b):\n return a + b\n\n def sum_int32(\n self,\na=aot.AbstractTensor(2, dtype=torch.int32),\nb=aot.AbstractTensor(2, dtype=torch.int32),\n):\n return self.compute_sum(a, b)\n
-
Shapes can be made dynamic using aot.AbstractTensor
and aot.jittable
constraints:
class DynamicSumModule(aot.CompiledModule):\n @aot.jittable\n def compute_sum(a, b):\n return a + b\n\n def sum_dynamic(\n self,\na=aot.AbstractTensor(None),\nb=aot.AbstractTensor(None),\n):\n return self.compute_sum(\n a,\n b,\nconstraints=[\na.dynamic_dim(0) == b.dynamic_dim(0),\n],\n)\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#global-variables","title":"Global variables","text":"Global variables are used to represent persistent state within a program instance.
For example, they can be used to represent the weights and biases in a neural network, and exporting these as mutable variables can allow for setting their values independently at runtime.
-
Individual globals can be exported using aot.export_global()
:
state_example = torch.tensor(0, dtype=torch.int32)\nupdate_example = torch.tensor(0, dtype=torch.int32)\n\nclass SampleModule(aot.CompiledModule):\n value = aot.export_global(state_example, mutable=True)\n\n def get_value(self):\n return self.value\n\n def update_value(self, new_value=aot.abstractify(update_example)):\n self.value = new_value\n
-
All named parameters on a nn.Module
can be exported using export_parameters()
:
class SimpleParams(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.classifier = torch.nn.Linear(20, 30)\n\n def forward(self, x):\n return self.classifier(x)\n\nm = SimpleParams()\n\nclass SimpleParamsModule(aot.CompiledModule):\nparams = aot.export_parameters(m)\ncompute = aot.jittable(m.forward)\n\n def run(self, x=aot.AbstractTensor(128, 20)):\n return self.compute(x)\n\n# torch.nn.Linear has 'weight' and 'bias' variables:\n# https://pytorch.org/docs/stable/generated/torch.nn.Linear.html\n# Add getters for both exported parameters.\ndef get_weight(self):\nreturn self.params[\"classifier.weight\"]\ndef get_bias(self):\nreturn self.params[\"classifier.bias\"]\n
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#samples_2","title":"Samples","text":"Code samples Advanced AOT export notebook PyTorch dynamic shapes notebook Unit tests tests/aot/
Dynamic MLP export examples/aot_mlp/mlp_export_dynamic.py
llama2 inference example examples/llama2_inference/stateless_llama.py
","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#alternate-workflows","title":"Alternate workflows","text":"Caution - These are due for migration to SHARK-Turbine.
Code samples (Deprecated) Inference on BERT","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/pytorch/#native-on-device-training","title":"Native / on-device training","text":"A small (~100-250KB), self-contained binary can be built for deploying to resource-constrained environments without a Python interpreter.
Example scripts Basic Inference and Training Example Native On-device Training Example","tags":["Python","PyTorch"]},{"location":"guides/ml-frameworks/tensorflow/","title":"TensorFlow integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#overview","title":"Overview","text":"IREE supports compiling and running TensorFlow programs represented as tf.Module
classes or stored in the SavedModel
format.
graph LR\n accTitle: TensorFlow to runtime deployment workflow overview\n accDescr {\n Programs start as either TensorFlow SavedModel or tf.Module programs.\n Programs are imported into MLIR as StableHLO.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n subgraph A[TensorFlow]\n direction TB\n A1[SavedModel]\n A2[tf.Module]\n\n A1 --- A2\n end\n\n subgraph B[MLIR]\n B1[StableHLO]\n end\n\n C[IREE compiler]\n D[Runtime deployment]\n\n A -- iree-import-tf --> B\n B --> C\n C --> D
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#prerequisites","title":"Prerequisites","text":" -
Install TensorFlow by following the official documentation:
python -m pip install tf-nightly\n
-
Install IREE packages, either by building from source or from pip:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime \\\niree-tools-tf\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime \\\niree-tools-tf\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#importing-models","title":"Importing models","text":"IREE compilers transform a model into its final deployable format in several sequential steps. The first step for a TensorFlow model is to use either the iree-import-tf
command-line tool or IREE's Python APIs to import the model into a format (i.e., MLIR) compatible with the generic IREE compilers.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#from-savedmodel-on-tensorflow-hub","title":"From SavedModel on TensorFlow Hub","text":"IREE supports importing and using SavedModels from TensorFlow Hub.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#using-the-command-line-tool","title":"Using the command-line tool","text":"First download the SavedModel and load it to get the serving signature, which is used as the entry point for IREE compilation flow:
import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\nprint(list(loaded_model.signatures.keys()))\n
Note
If there are no serving signatures in the original SavedModel, you may add them by yourself by following \"Missing serving signature in SavedModel\".
Then you can import the model with iree-import-tf
. You can read the options supported via iree-import-tf -help
. Using MobileNet v2 as an example and assuming the serving signature is predict
:
iree-import-tf\n --tf-import-type=savedmodel_v1 \\\n--tf-savedmodel-exported-names=predict \\\n/path/to/savedmodel -o iree_input.mlir\n
Tip
iree-import-tf
is installed as /path/to/python/site-packages/iree/tools/tf/iree-import-tf
. You can find out the full path to the site-packages
directory via the python -m site
command.
Tip
-tf-import-type
needs to match the SavedModel version. You can try both v1 and v2 if you see one of them gives an empty dump.
Next, you can compile the model in iree_input.mlir
for one of IREE's supported targets by following one of the deployment configuration guides.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#samples","title":"Samples","text":"Colab notebooks Training an MNIST digits classifier Edge detection Pretrained ResNet50 inference TensorFlow Hub import End-to-end execution tests can be found in IREE's integrations/tensorflow/e2e/ directory.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#troubleshooting","title":"Troubleshooting","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tensorflow/#missing-serving-signature-in-savedmodel","title":"Missing serving signature in SavedModel","text":"Sometimes SavedModels are exported without explicit serving signatures. This happens by default for TensorFlow Hub SavedModels. However, serving signatures are required as entry points for IREE compilation flow. You can use Python to load and re-export the SavedModel to give it serving signatures. For example, for MobileNet v2, assuming we want the serving signature to be predict
and operating on a 224x224 RGB image:
import tensorflow.compat.v2 as tf\nloaded_model = tf.saved_model.load('/path/to/downloaded/model/')\ncall = loaded_model.__call__.get_concrete_function(\n tf.TensorSpec([1, 224, 224, 3], tf.float32))\nsignatures = {'predict': call}\ntf.saved_model.save(loaded_model,\n '/path/to/resaved/model/', signatures=signatures)\n
The above will create a new SavedModel with a serving signature, predict
, and save it to /path/to/resaved/model/
.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/","title":"TensorFlow Lite integration","text":"","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#overview","title":"Overview","text":"IREE supports compiling and running TensorFlow Lite (TFLite) programs stored as TFLite FlatBuffers. These files can be imported into an IREE-compatible format then compiled to a series of backends.
graph LR\n accTitle: TFLite to runtime deployment workflow overview\n accDescr {\n Programs start as TensorFlow Lite FlatBuffers.\n Programs are imported into MLIR's TOSA dialect using iree-import-tflite.\n The IREE compiler uses the imported MLIR.\n Compiled programs are used by the runtime.\n }\n\n subgraph A[TFLite]\n A1[FlatBuffer]\n end\n\n subgraph B[MLIR]\n B1[TOSA]\n end\n\n C[IREE compiler]\n D[Runtime deployment]\n\n A -- iree-import-tflite --> B\n B --> C\n C --> D
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#prerequisites","title":"Prerequisites","text":" -
Install TensorFlow by following the official documentation:
python -m pip install tf-nightly\n
-
Install IREE packages, either by building from source or from pip:
Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime \\\niree-tools-tflite\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime \\\niree-tools-tflite\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#importing-and-compiling","title":"Importing and Compiling","text":"IREE's tooling is divided into two components: import and compilation.
- The import tool converts the TFLite FlatBuffer to an IREE compatible form, validating that only IREE compatible operations remain. Containing a combination of TOSA and IREE operations.
- The compilation stage generates the bytecode module for a list of targets, which can be executed by IREE.
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-command-line-tools","title":"Using Command Line Tools","text":"These two stages can be completed entirely via the command line.
WORKDIR=\"/tmp/workdir\"\nTFLITE_URL=\"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\nTFLITE_PATH=${WORKDIR}/model.tflite\nIMPORT_PATH=${WORKDIR}/tosa.mlir\nMODULE_PATH=${WORKDIR}/module.vmfb\n\n# Fetch the sample model\nwget ${TFLITE_URL} -O ${TFLITE_PATH}\n\n# Import the sample model to an IREE compatible form\niree-import-tflite ${TFLITE_PATH} -o ${IMPORT_PATH}\n\n# Compile for the CPU backend\niree-compile \\\n--iree-input-type=tosa \\\n--iree-hal-target-backends=llvm-cpu \\\n${IMPORT_PATH} \\\n-o ${MODULE_PATH}\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#using-the-python-api","title":"Using the Python API","text":"The example below demonstrates downloading, compiling, and executing a TFLite model using the Python API. This includes some initial setup to declare global variables, download the sample module, and download the sample inputs.
Declaration of absolute paths for the sample repo and import all required libraries. The default setup uses the CPU backend as the only target. This can be reconfigured to select alternative targets.
import iree.compiler.tflite as iree_tflite_compile\nimport iree.runtime as iree_rt\nimport numpy\nimport os\nimport urllib.request\n\nfrom PIL import Image\n\nworkdir = \"/tmp/workdir\"\nos.makedirs(workdir, exist_ok=True)\n\ntfliteFile = \"/\".join([workdir, \"model.tflite\"])\njpgFile = \"/\".join([workdir, \"input.jpg\"])\ntfliteIR = \"/\".join([workdir, \"tflite.mlir\"])\ntosaIR = \"/\".join([workdir, \"tosa.mlir\"])\nbytecodeModule = \"/\".join([workdir, \"iree.vmfb\"])\n\nbackends = [\"llvm-cpu\"]\nconfig = \"local-task\"\n
The TFLite sample model and input are downloaded locally.
tfliteUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8.tflite\"\njpgUrl = \"https://storage.googleapis.com/iree-model-artifacts/tflite-integration-tests/posenet_i8_input.jpg\"\n\nurllib.request.urlretrieve(tfliteUrl, tfliteFile)\nurllib.request.urlretrieve(jpgUrl, jpgFile)\n
Once downloaded we can compile the model for the selected backends. Both the TFLite and TOSA representations of the model are saved for debugging purposes. This is optional and can be omitted.
iree_tflite_compile.compile_file(\n tfliteFile,\n input_type=\"tosa\",\n output_file=bytecodeModule,\n save_temp_tfl_input=tfliteIR,\n save_temp_iree_input=tosaIR,\n target_backends=backends,\n import_only=False)\n
After compilation is completed we configure the VmModule using the local-task configuration and compiled IREE module.
config = iree_rt.Config(\"local-task\")\ncontext = iree_rt.SystemContext(config=config)\nwith open(bytecodeModule, 'rb') as f:\n vm_module = iree_rt.VmModule.from_flatbuffer(config.vm_instance, f.read())\n context.add_vm_module(vm_module)\n
Finally, the IREE module is loaded and ready for execution. Here we load the sample image, manipulate to the expected input size, and execute the module. By default TFLite models include a single function named 'main'. The final results are printed.
im = numpy.array(Image.open(jpgFile).resize((192, 192))).reshape((1, 192, 192, 3))\nargs = [im]\n\ninvoke = context.modules.module[\"main\"]\niree_results = invoke(*args)\nprint(iree_results)\n
","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#samples","title":"Samples","text":" -
The tflitehub folder in the iree-samples repository contains test scripts to compile, run, and compare various TensorFlow Lite models sourced from TensorFlow Hub.
-
An example smoke test of the TensorFlow Lite C API is available here.
Colab notebooks Text classification with TFLite and IREE","tags":["Python","TensorFlow"]},{"location":"guides/ml-frameworks/tflite/#troubleshooting","title":"Troubleshooting","text":"Failures during the import step usually indicate a failure to lower from TensorFlow Lite's operations to TOSA, the intermediate representation used by IREE. Many TensorFlow Lite operations are not fully supported, particularly those than use dynamic shapes. Please reach out on one of IREE's communication channels if you notice something missing.
","tags":["Python","TensorFlow"]},{"location":"reference/","title":"Reference pages","text":""},{"location":"reference/#api-bindings","title":"API bindings","text":"IREE offers API bindings for compiling and running programs from various languages.
- Index page
"},{"location":"reference/#mlir-dialects","title":"MLIR dialects","text":"Automatically generated documentation for the MLIR dialects defined in the IREE repository.
- Index page
"},{"location":"reference/#other-topics","title":"Other topics","text":" - Glossary
- Optimization options
- Extensions
"},{"location":"reference/extensions/","title":"Extension mechanisms","text":"Note
Much of this describes provisions for extension within IREE but until the core of the system has settled little work will be done to fully flesh-out and document them in detail. A large majority of things that would make someone want to extend IREE can instead be accomplished much easier and performantly using native MLIR dialects that are then processed by the IREE compiler.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#guidelines","title":"Guidelines","text":"IREE has a compiler and runtime separation, a multi-layered architecture, and split between execution of \"host code\" that schedules compute-heavy work and SPMD \"device code\" that performs the bulk of compute operations. Each axis has a different set of extension mechanisms that can be used independently or combined.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#extension-philosophy","title":"Extension philosophy","text":"Organized below are some of the mechanisms IREE provides for extending the core compiler and runtime and when they should(n't) be used. The goal of these progressively lower-level extension mechanisms is to make it easier for users to fall into the pit of success:
Quote
\"a well-designed system makes it easy to do the right things and annoying (but not impossible) to do the wrong things.\" - Jeff Atwood
The amount of engineering complexity for initial bring-up and maintenance increases with each subsequently lower-level approach and it is best to start from the top and exit as fast as possible: this is a choose-your-own-adventure where you're trying to escape the dungeon with both the loot and your limbs . Avoid the temptation of immediately dropping down to making external C calls at runtime because that's how it's been done before as it's easier, more robust, and more performant to use the system as it is intended to be used.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-extend","title":"When to extend","text":"The primary goal when extending any framework should first be to avoid extending it at all. There is no mechanism that is free - whether in terms of engineering effort to develop and maintain over time, include in compiler deployments, or include in runtime deployments. As a system scales in deployment configurations the available mechanisms for extension increase but so too does the chaos introduced by extensions that do not also scale with that design. Users are the only ones who can determine the tradeoffs they are willing to accept: for example, the mechanism to extend device code with a custom runtime call to a C function does not work on GPUs and gets significantly more complicated on CPUs as sandboxes/enclaves are used - but if the user scenario is for local process CPU-only execution that may not matter.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-inputscompilerruntime","title":"Where to extend (inputs/compiler/runtime)","text":"Consider in normal software development when one would choose to write more code (possibly packaging it into a reusable library) vs. changing the programming language or compiler they are using to compile their code vs. changing the operating systems their code runs on. The further one gets from the problem they are trying to solve the more work, coordination, and maintenance is involved and though there are reasons to make changes across the stack they should be done only when a simpler solution would not suffice.
An author will retain more control over their logic the closer they sit to the inputs to the compiler. IREE provides several mechanisms that try to keep control with the author and robust to changes in IREE or MLIR internals and it is strongly encouraged that those looking to extend take those routes first. Contributions that help everyone are very welcome but do have a higher cost and it's often much easier to design and justify upstream changes with working examples in forks or at higher levels of the stack.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#where-to-extend-hostdevice","title":"Where to extend (host/device)","text":"From a performance perspective the rule is to colocate code with the data it is acting on: tensor data, for example, should almost exclusively be manipulated by device code as tensors live on device. Attempting to use tensor data with host code will result in synchronization points and host/device transfers that can decimate performance. This can lead to seemingly paradoxical situations where swapping out compiler-generated code for a human-authored \"fast path\" can be slower than even the most naive compiler results. An important thing to keep in mind with compilers is that it is exceedingly difficult to produce code by hand that is consistently more performant across a broad range of deployments and the first temptation should always be to improve the compiler - extending it via other mechanisms when not required by the task is often just premature optimization.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#1-target-iree-input-dialects","title":"1. Target IREE input dialects","text":"TL;DR
Convert your custom ops into standard MLIR dialects.
+------------+ +--------+ +---------------+\n| Your input | -+-> | iree | -+-> | IREE compiler |\n+------------+ | +--------+ | +---------------+\n | +--------+ |\n +-> | linalg | -+\n | +--------+ |\n | .... |\n
The easiest, cleanest, and most robust path to extend IREE is to make use of what MLIR is designed for: composing dialects and converting between them. IREE supports several input dialects such as tosa
, mhlo
, linalg
, and the standard arith
, math
, tensor
, and scf
dialects. Any source IR that can be turned into that mix of dialects (directly or transitively) will work with the whole IREE pipeline for all deployment configurations and targets. If possible to express the computation in this form it will always be the best route to getting small deployments without the need to modify or include any additional code at runtime and run on all device types and execution modes.
This mechanism can also be layered with any of the subsequent lower-level ones: if some part of the operation runs on the host and some part on device then decomposing it such that it contains as many standard ops for flow control as possible and linear algebra/custom ops for the dense math will reduce the engineering effort required on both sides and lead to an easier to maintain solution even if lower-level extension is required.
A large majority of classic ML \"custom ops\" can be accomplished with this approach. When bringing up projects built on IREE it's best to concisely describe the operation in more elemental mathematical representations and then add optimizations where required knowing that things will still work even if those optimizations never happen.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros","title":"Pros","text":" - No IREE compiler or runtime code changes required.
- Can use standard IREE packaged releases and tools.
- No versioning issues at runtime.
- IREE's host/device partitioning can partition your code.
- Fusion and other compiler techniques (CSE/DCE/inlining/etc) work on your code.
- All target backends (CPU/GPU/accelerators/enclaves/etc) work.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons","title":"Cons","text":" - Input dialects cannot natively represent all possible programs (such as file IO and other syscalls).
- Performance-sensitive host code (b-trees and other in-memory databases) will run through the slower VM paths if not authored as dense compute.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use","title":"When to use","text":" - Targeting multiple MLIR toolchains of which IREE is just one (as little to no IREE-specific code is required).
- Operation represents host code in addition to device code.
- All code is known statically or symbolically at compile-time (instead of independently versioned libraries at runtime).
- Complex high-performance code not representable as linear algebra.
- External runtime interactions (file/network/user IO). Use custom modules.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation","title":"Implementation","text":"To make use of this approach one just needs to follow the standard MLIR dialect conversion behavior: add a dialect with ops, add a conversion pass, and run that pass before providing the resulting IR to the IREE compiler. See Creating a Dialect.
Think of this like authoring C++ sources with templates that you compile into your application: Clang (and LLVM beyond) don't know about your library details and instead just process it as it would any other code. You can take the same source and pass it to GCC and it'll be robust to underlying changes in the system.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#2-extend-host-code-with-custom-modules","title":"2. Extend host code with custom modules","text":"TL;DR
Import MLIR functions in the compiler and custom modules at runtime.
// Main user module compiled by IREE:\nmodule @model {\n // Declare a synchronous external function:\n func.func private @my_custom_module.sync_func(%input: tensor<?xf32>) -> i32\n // Declare an asynchronous external function:\n func.func private @my_custom_module.async_func(%input: tensor<?xf32>) -> tensor<?xf32> attributes {\n iree.abi.model = \"coarse-fences\",\n nosideeffects\n }\n func.func @predict() {\n ...\n // Call a synchronous/blocking external function:\n %sync_result = call @my_custom_module.sync_func(%sync_input) : (tensor<?xf32>) -> i32\n ...\n ...\n // Call an asynchronous/non-blocking external function:\n %async_result = call @my_custom_module.async_func(%async_input) : (tensor<?xf32>) -> tensor<?xf32>\n ...\n }\n}\n
IREE provides dynamic linking at runtime via its VM interfaces. For code that runs on the host and requires syscalls or calling out to existing libraries - such as file IO, text processing, and JPEG decoding - this is an easy way to interop without paying attention to the more complex details of device code. An IREE module compiled using custom modules is portable and dynamically deployable so long as the custom module is registered at runtime.
This approach conceptually matches what normal native binaries do in an OS: imports are declared and at runtime they are resolved based on the available exports of modules in the system. Just as with normal systems engineering design of the API between modules is up to the user and depending on rigor can have several pitfalls but these problems and their solutions are not IREE specific and anyone who has designed a shared library interface can apply the same rules here in IREE around versioning, performance, etc. One does not add 2 integers via a syscall and the same holds here: custom modules and the functions within should perform a large amount of work to hide overheads involved in the cross-module calls and users must be aware that the compiler cannot optimize across the call boundaries.
See the synchronous tensor I/O and asynchronous tensor I/O samples.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_1","title":"Pros","text":" - No IREE compiler code changes required.
- Produced artifacts are portable across IREE deployment configurations.
- Full system access is allowed - the VM just calls external functions.
- Runtime modules can be implemented (via shims) in other languages/runtimes.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_1","title":"Cons","text":" - Custom modules must be registered at runtime by the user.
- The VM custom module ABI goo must be authored by the user (such as with JNI or pybind to move between java/python and C).
- All custom module code must be compiled and deployed regardless of how much any modules use. The granularity of modules and their versioning is up to the user.
- Custom module code cannot be optimized by the IREE compiler to avoid host/device readbacks and unnecessary data type conversion.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_1","title":"When to use","text":" - Interactions with large libraries or system calls.
- Performance-sensitive host code that cannot easily be represented as device code (like UTF-8 string transformation using libicu).
- Extensively using tensor resources.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_1","title":"Implementation","text":"The runtime portion requires that the code be exported to the VM system by way of an iree_vm_module_t
interface. A low-level native interface exists with minimal overhead and is used for example by the IREE HAL itself. There is also a C++ wrapper that is significantly easier to work with however it needs some performance improvements.
Full end-to-end examples can be found under samples/custom_modules/
:
- The basic sample shows how to add VM modules with custom types and take advantage of ABI features like fallback functions and optional imports.
- The synchronous tensor I/O sample shows a call taking and returning a tensor and performing blocking work.
- The asynchronous tensor I/O sample shows the same thing but with fences for asynchronous scheduling.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#3-extend-target-specific-device-conversion-patterns","title":"3. Extend target-specific device conversion patterns","text":"TL;DR
Add patterns to iree/Compiler/Codegen/
to emit target code.
The easiest and most robust path for specializations of device code is to emit such code mixed with the IREE compiler generated code at the highest possible level of abstraction within the target pipeline. For example, if the code can be represented with the vector
dialect then inserting conversion patterns between linalg
and vector
enables the emitted code to be specialized further based on user configuration and optimized with the full set of available passes that run in the pipeline. For each level lower one goes the more flexibility they gain such as being able to emit inline assembly blocks that do anything while trading off generality and multi-targeting applicability.
How much the tradeoff matters is based on the behavior of the extension. If a pattern changing a transcendental function to an approximation can operate at the vector level then all IREE deployment targets can benefit from the pattern and as new targets are made available they will automatically receive the benefits. In contrast, a pattern at the vector level that turns generic vector operations into architecture-specific LLVM intrinsics by its nature only pertains to a single target family and can be done at a lower level. As a rule of thumb if a particular pattern is going to need ~N implementations for ~N targets that are all mostly the same it's better to try to move that higher in the stack.
At this point the complexity of extending things is still fairly constrained: a C++ pass or pattern is verified with normal lit tests and can be upstreamed easily either into MLIR or IREE (a large number of IREE patterns are upstreamed, benefiting all users of MLIR). Cross-compilation and versioning are not a factor and the IREE artifacts can be considered durable at a coarse level (outside of major target architectural changes).
Note that depending on the target there are various mechanisms for representing code in MLIR, up to including inline assembly snippets in IR via llvm.inline_asm
.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_2","title":"Pros","text":" - Not limited to what is possible to represent in any particular MLIR dialect.
- Rich target configuration available; multiple passes can contribute info.
- Produced executable binaries are hermetic and no runtime changes are required.
- Specialization can happen in MLIR dialects like
linalg
or vector
as well as target-specific representations like SPIR-V and LLVM IR. - The compiler can perform deep optimizations across both the generated code and the provided code (hoisting/loop invariant code motion/cse/etc).
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_2","title":"Cons","text":" - Requires implementing the patterns as code in the IREE compiler or via TBD interfaces.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_2","title":"When to use","text":" - Code that must be emitted during target lowering - such as something optimizing for a particular CPU architecture.
- Hot code mixed with generated code at a fine granularity (within the innermost loop).
- External existing hand-authored libraries. Either statically or dynamically link instead.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_2","title":"Implementation","text":"There are several ways to author patterns and passes in MLIR. As examples:
- A majority of patterns are authored in C++ using PatternRewriter.
- PDL is an MLIR-based way to express rewrite operations with strong typing, compile-time verification, and easily-readable and less-verbose IR.
linalg
uses a python-based DSL for defining some of its extended ops.
There are many examples within both MLIR and IREE, one specifically being the polynomial approximation expansion patterns.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#4-include-external-target-specific-device-code","title":"4. Include external target-specific device code","text":"TL;DR
Statically link external object files into IREE executables.
For large bodies of existing device code or library calls that are available for static linkage the work involved to reimplement them at higher levels of the stack can be cost prohibitive even if it leads to better results. In these cases just as with a normal toolchain one would just want to declare an external function, call it, and add the object file to the linker command line. In IREE the same can be performed by way of taking compatible bitcode or native object files and linking them in with the generated code. An MLIR pattern would declare and emit the call and the target-specific IREE linker would pull in the objects.
As the linking behavior varies per target (for example, some targets like SPIR-V don't have traditional linkers) how this is performed is up to the IREE target backends. The complexity involved in producing the object files to link will also vary per-backend and the complexity of the deployment: cross-compiling for multiple architectures or compilation modes (ASAN, etc) will require unique copies of the object files matching that precise configuration.
At this point generality is largely out as is the ability to cleanly upstream such files. It should be apparent how a few dozen lines of C++ or PDL that avoids the need for any of this complexity is more appealing. In extremely specific cases of a single platform/architecture/version for a single program deployed via a specific artifact composition it's not so bad but IREE is designed such that extreme specificity is an optional mode of the more general solution. This does not mean this mechanism is not useful in some situations and only that it should be a last-resort when one of the easier to manage solutions is not viable - not a shortcut to avoid writing some C++ patterns.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_3","title":"Pros","text":" - Works with hand-authored code in compatible object files from any toolchain.
- No IREE runtime changes required.
- All deployment modes still work, including multi-targeting.
- No versioning concerns as custom code is included in artifacts.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_3","title":"Cons","text":" - Users must provide per-target precompiled object files on disk.
- IREE compiler changes are still needed for generating the external calls.
- Though LTO may be able to optimize across the calls it is not guaranteed.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_3","title":"When to use","text":" - Existing math libraries or architecture-specific functions that cannot be ported into a more MLIR-friendly form.
- Mixing in hand-authored code written in C/rust/etc with generated code from MLIR.
- External code can be represented as either
linalg
, vector
, or LLVM IR. Use target-specific conversion patterns instead. - External code size is large and unlikely to benefit from link-time optimizations (such as something like libjpeg). Dynamically link instead.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_3","title":"Implementation","text":"As the linking behavior varies per target backend there is no general solution at this level: if targeting the CPU then the system native linker or lld need to be provided the object files, while SPIR-V will need to merge the SPIR-V binaries directly, and Metal shader libraries will need to be constructed with the Apple-specific metallib
tooling. Producing these files and performing the linking is outside the scope of IREE.
If the files can be acquired then compiler changes will be required to emit calls to them and invoke the linker with the the files.
On the CPU an alternative is to use the static library output mode where IREE produces an object file and then the user invokes the linker themselves; this still requires the compiler changes to emit the calls but avoids needing to teach the compiler how to link the files.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#5-dynamically-link-target-specific-device-code-cpu-only","title":"5. Dynamically link target-specific device code (CPU only)","text":"TL;DR
Dynamically link external C functions at runtime from device code.
It is pitch black. You are likely to be eaten by a grue.
This is the lowest-level integration in the system and is designed to act as an escape hatch and - as with any emergency escape hatch - it's not designed for ergonomics. Users should try first to come in through the door and attempting to use this mechanism should trigger alarms about the approach being attempted.
IREE's execution model for device code and native machine binary deployment mechanisms are designed with several constraints in order to make all of the above approaches possible and performant. Calling arbitrary C functions from deep within the system can introduce subtle (and not-so-subtle) bugs that are extremely difficult to track down and versioning between the compiler emitting the calls and the runtime providing the implementations can cause skew unless held carefully. Consider the methods added here like syscalls in that they must be extremely focused and if they are ever likely to change (including being removed) then care will be needed just as with versioning or redirecting a syscall. Designing good stable interfaces is hard and a classic pit of failure.
Some things to note:
- Device code executes in a tiled fashion and single dispatches may invoke the same function many times from many threads concurrently to perform the larger work.
- Tiles may execute in any order and on any thread; performing fine-grained locking within the tile can lead to deadlocks.
- Device code is stateless in order to allow for access restrictions and caching across multiple loaded models - any library state required must be externally managed via process globals.
- Device code may be running out-of-process (sandbox/enclave) and the library functions must be available where the dispatches run and not where they are launched (such as being linked into the sandbox binary, if separate from the main process binary).
- The stack must be used to pass arguments/results to external calls via a single pointer and there is no libffi-like functionality for magically calling arbitrary C functions. Users must provide the shims they need.
- Thread-local storage is unavailable in the called code (it may be usable, but it is not guaranteed it'll work on all platforms and leaks are likely).
- No heap allocator is provided and the use of libc malloc is unsupported.
Most of the constraints here come from the SPMD parallelism model, platform-agnostic deployment format, and overall data-oriented design of IREE. Code operating in this fashion has a certain shape and that is usually not the same as big legacy single-threaded CPU-focused BLAS libraries that perform their own caching, internal thread and state management, and other shenanigans. IREE is not designed to wrap such things and if any of these notes are issues it is more an indicator that the approach needs adjustment than anything else. Trying to bypass or workaround the constraints is possible - after all IREE is an open source project and any user is welcome to fork it - but unsupported by the core IREE team.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#pros_4","title":"Pros","text":" - Function resolution at runtime is orthogonal to compiler target specification.
- Machine code can be shared between the application and IREE artifacts.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#cons_4","title":"Cons","text":" - IREE compiler and runtime must both be modified.
- Deeper integration with the IREE codegen compiler infrastructure required.
- ABI versioning complexity between compiler and runtime.
- Runtimes must ship the imports for the lifetime of any artifact compiled to use them.
- Humans are bad at predicting the future.
- Using the same artifact in different binaries at runtime requires changes to each binary - including those that may not be owned by the person producing the artifact.
- Weak imports and conditional usage can help but still leads to bloat.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#when-to-use_4","title":"When to use","text":" - Calling into opaque closed-source BLAS-like microkernel libraries.
- Any other cases covered above can be used, especially microkernels that can be represented in MLIR or as statically linked libraries.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/extensions/#implementation_4","title":"Implementation","text":"The compiler is changed to produce calls to imports via a dynamic import table provided to each dispatch function. The import table is declared in the executable library for use at runtime. Runtime applications register an import provider to resolve named symbols in the import table to C functions that marshal arguments and results.
The compiler-side needs some additional work but an example is included here: Issue 7504. The runtime-side is complete and resolution is performed by a user-supplied iree_hal_executable_import_provider_t
.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/","title":"Glossary","text":"IREE exists in an ecosystem of projects and acts as a bridge between machine learning frameworks and a variety of hardware platforms. This glossary outlines some of those projects and technologies.
Something missing?
Don't see a project of technology here that you think should be? We welcome contributions on our GitHub page!
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#jax","title":"JAX","text":"JAX is Python framework supporting high-performance machine learning research by bridging automatic differentiation and ML compilers like XLA and IREE.
See the JAX Integration guide for details on how to use JAX programs with IREE.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#mlir","title":"MLIR","text":"Multi-Level Intermediate Representation (MLIR) is the compiler framework that IREE is built around. Beyond the tooling this includes a set of common dialects and transformations that IREE utilizes for its code generation system.
For general discussion on MLIR see the project's discourse forum.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#linalg","title":"Linalg","text":"Linalg is an MLIR dialect that defines Linear Algebra operations in a generalized fashion by modeling iteration spaces together with compute payloads. Linalg includes a set of commonly used operations as well as generic interfaces.
IREE uses the Linalg dialect during its code generation pipeline to define tensor operations then generate loop structures for its various backend targets.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#openxla","title":"OpenXLA","text":"OpenXLA is a community-driven, open source ML compiler ecosystem.
IREE is one project under the OpenXLA GitHub Organization, and it interfaces with many of the other projects, such as StableHLO.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#pytorch","title":"PyTorch","text":"PyTorch is an optimized tensor library for deep learning.
PyTorch uses the Torch-MLIR project to interface with projects like IREE. See the PyTorch Integration guide for details on how to use PyTorch programs with IREE.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#spir-v","title":"SPIR-V","text":"SPIR-V is a shader and kernel intermediate language for expressing parallel computation typically used for GPUs. It serves as a hardware agnostic assembly format for distributing complex, computationally intensive programs.
IREE uses the SPIR-V MLIR Dialect in its code generation pipeline for Vulkan and other compute APIs.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#stablehlo","title":"StableHLO","text":"StableHLO is a set of versioned high-level operations (HLOs) for ML models with backward and forward compatibility guarantees. StableHLO aims to improve interoperability between frameworks (such as TensorFlow, JAX, and PyTorch) and ML compilers.
StableHLO has both a specification and an MLIR dialect.
IREE uses the StableHLO MLIR Dialect as one of its input formats.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tosa","title":"TOSA","text":"Tensor Operator Set Architecture (TOSA) provides a set of tensor operations commonly employed by Deep Neural Networks. TOSA defines accuracy and compatibility constraints so frameworks that use it can trust that applications will produce similar results on a variety of hardware targets.
TOSA has both a specification and an MLIR dialect.
IREE uses the TOSA MLIR dialect as one of its input formats.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/glossary/#tflite","title":"TFLite","text":"TensorFlow Lite (TFLite) is a library for deploying models on mobile and other edge devices.
IREE supports running TFLite programs that have been imported into MLIR using the TOSA dialect. See the TFLite Integration guide for details on how to use TFLite programs with IREE.
IREE also has bindings for the TFLite C API, see the runtime/bindings/tflite/
directory for details.
","tags":["JAX","PyTorch","TensorFlow"]},{"location":"reference/optimization-options/","title":"Optimization options","text":"This page documents various supported flags for optimizing IREE programs. Each is presented with its English name, flag to enable/disable, and default state.
These flags can be passed to the:
iree-compile
command line tool extra_args=[\"--flag\"]
argument to iree.compiler.tools
Python wrappers - In-process Python compiler API
iree.compiler.transforms.iree-compile.CompilerOptions(\"--flag\", \"--flag2\")
constructor ireeCompilerOptionsSetFlags()
compiler C API function
"},{"location":"reference/optimization-options/#high-level-program-optimizations","title":"High level program optimizations","text":""},{"location":"reference/optimization-options/#constant-evaluation-iree-opt-const-eval-on","title":"Constant evaluation (--iree-opt-const-eval
(on))","text":"Performs compile-time evaluation of any global initializers which produce the initial values for global constants, storing the global directly in the program as constant data. This extracts such constant program fragments and recursively compiles them, using the runtime to evaluate the results.
Note that this only has any effect on computations in module initializer functions, not free-standing operations in the program which may produce constant-derived results. See --iree-opt-const-expr-hoisting
for options to optimize these.
"},{"location":"reference/optimization-options/#constant-expression-hoisting-iree-opt-const-expr-hoisting-off","title":"Constant expression hoisting (--iree-opt-const-expr-hoisting
(off))","text":"Identifies all trees of constant expressions in the program and uses a heuristic to determine which would be profitable to hoist into global initializers for evaluation at module load. Together with --iree-opt-const-eval
, this will convert eligible trees of expressions to purely static data embedded in the module.
The heuristic is currently relatively primitive, using static information to disable hoisting of leaf operations which are metadata only (i.e. broadcasts, etc) or are expected to fold away as part of operator fusion. Notably, the current heuristic is likely to pessimize module size in the case of complicated programs with trees of constant, large tensors.
"},{"location":"reference/optimization-options/#numeric-precision-reduction-iree-opt-numeric-precision-reduction-off","title":"Numeric precision reduction (--iree-opt-numeric-precision-reduction
(off))","text":"Analyzes program constant data and program flow to identify math operations which can be safely evaluated with reduced precision (currently with a minimum of 8bit integers but being extended to infer any bit depth) and inserts appropriate casts. In conjunction with Constant Expression Hoisting, Constant Evaluation and other automatic optimizations, this can produce programs where large amounts (up to the whole) have had their numeric operations and constant data rewritten to lower precision types.
This feature is actively evolving and will be the subject of dedicated documentation when ready.
"},{"location":"reference/optimization-options/#strip-debug-assertions-iree-opt-strip-assertions-off","title":"Strip Debug Assertions (--iree-opt-strip-assertions
(off))","text":"Strips all std.assert
ops in the input program after useful information for optimization analysis has been extracted. Assertions provide useful user-visible error messages but can prevent critical optimizations. Assertions are not, however, a substitution for control flow and frontends that want to check errors in optimized release builds should do so via actual code - similar to when one would if (foo) return false;
vs. assert(foo);
in a normal program.
"},{"location":"reference/bindings/","title":"API bindings","text":"API bindings allow for programmatic use of IREE's compiler and runtime components. The core IREE project is written in C1, allowing for API bindings to be written in a variety of other languages.
Something missing?
Want to use another language? Looking for something specific out of one of those already listed?
We welcome discussions on our communication channels and contributions on our GitHub page!
"},{"location":"reference/bindings/#official-api-bindings","title":"Official API bindings","text":"Members of the core project team and OpenXLA partners maintain these official bindings:
Language Compiler API? Runtime API? Published packages? C/C++ Supported Supported Unsupported Python Supported Supported Supported JavaScript Experimental Experimental Unsupported"},{"location":"reference/bindings/#cc","title":"C/C++","text":"See the C API reference page.
"},{"location":"reference/bindings/#python","title":"Python","text":"See the Python reference page.
"},{"location":"reference/bindings/#javascript","title":"JavaScript","text":" - JavaScript bindings for WebAssembly and WebGPU are under development in IREE's
experimental/web/
directory.
"},{"location":"reference/bindings/#unofficial-api-bindings","title":"Unofficial API bindings","text":"Members of our developer community have authored bindings using other languages:
Language Compiler API? Runtime API? Published packages? Julia Experimental Experimental Unsupported Rust Unsupported Experimental Experimental"},{"location":"reference/bindings/#julia","title":"Julia","text":" - Coil.jl is an experimental package to lower and execute Julia tensor operations to IREE.
"},{"location":"reference/bindings/#rust","title":"Rust","text":" - iree-rs is a crate containing rustic bindings for the IREE runtime.
-
with some C++ tools and utilities\u00a0\u21a9
"},{"location":"reference/bindings/c-api/","title":"C API bindings","text":""},{"location":"reference/bindings/c-api/#overview","title":"Overview","text":"The IREE compiler and IREE runtime both have their own C/C++ APIs. This page introduces the available APIs and describes how to use them from your applications.
Note
There are multiple ways to distribute and depend on C/C++ projects, each with varying levels of portability, flexibility, and toolchain compatibility. IREE aims to support common configurations and platforms.
"},{"location":"reference/bindings/c-api/#compiler-api","title":"Compiler API","text":"The IREE compiler is structured as a monolithic shared object with a dynamic plugin system allowing for extensions. The shared object exports symbols for versioned API functions.
graph TD\n accTitle: IREE compiler linkage model diagram\n accDescr {\n The libIREECompiler.so or IREECompiler.dll shared object contains pipelines,\n target backends, and general passes as private implementation details.\n Compiler plugins interface with the compiler shared object to extend it with\n custom targets, dialects, etc.\n Applications interface with the compiler shared object through the compiler\n C API's exported symbols.\n }\n\n subgraph compiler[libIREECompiler.so / IREECompiler.dll]\n pipelines(\"Pipelines\n\n \u2022 Flow\n \u2022 Stream\n \u2022 etc.\")\n\n targets(\"Target backends\n\n \u2022 llvm-cpu\n \u2022 vulkan-spirv\n \u2022 etc.\")\n\n passes(\"General passes\n\n \u2022 Const eval\n \u2022 DCE\n \u2022 etc.\")\n end\n\n plugins(\"Compiler plugins\n\n \u2022 Custom targets\n \u2022 Custom dialects\n \u2022 etc.\")\n\n application(Your application)\n\n compiler <-- \"Plugin API<br>(static or dynamic linking)\" --> plugins\n compiler -. \"Compiler C API<br>(exported symbols)\" .-> application
API definitions can be found in the following locations:
Source location Overview iree/compiler/embedding_api.h
Top-level IREE compiler embedding API iree/compiler/PluginAPI/
directory IREE compiler plugin API mlir/include/mlir-c/
directory MLIR C API headers"},{"location":"reference/bindings/c-api/#concepts","title":"Concepts","text":"The compiler API is centered around running pipelines to translate inputs to artifacts. These are modeled via sessions, invocations, sources, and outputs.
stateDiagram-v2\n accTitle: IREE compiler session and invocation state diagram\n accDescr {\n Input files are opened (or buffers are wrapped) as sources in a session.\n Sources are parsed into invocations, which run pipelines.\n Output files are written (or buffers are mapped) for compilation artifacts.\n Sessions can contain multiple sources and run multiple invocations.\n }\n\n direction LR\n InputFile --> Source1 : open file\n InputBuffer --> Source2 : wrap buffer\n\n state Session {\n Source1 --> Invocation1\n Source2 --> Invocation2\n Invocation1 --> Invocation1 : run pipeline\n Invocation2 --> Invocation2 : run pipeline\n }\n\n Invocation1 --> Output1File : write file\n Invocation1 --> Output1Buffer : map memory\n Invocation2 --> Output2Buffer : map memory
"},{"location":"reference/bindings/c-api/#sessions","title":"Sessions","text":"A session (iree_compiler_session_t
) is a scope where one or more invocations can run.
- Internally, sessions consist of an
MLIRContext
and a private set of options. - Sessions may activate available plugins based on their options.
"},{"location":"reference/bindings/c-api/#invocations","title":"Invocations","text":"An invocation (iree_compiler_invocation_t
) is a discrete run of the compiler.
- Invocations run pipelines, consisting of passes, to translate from sources to outputs.
"},{"location":"reference/bindings/c-api/#sources","title":"Sources","text":"A source (iree_compiler_source_t
) represents an input program, including operations and data.
- Sources may refer to files or buffers in memory.
"},{"location":"reference/bindings/c-api/#outputs","title":"Outputs","text":"An output (iree_compiler_output_t
) represents a compilation artifact.
- Outputs can be standalone files or more advanced streams.
"},{"location":"reference/bindings/c-api/#plugins","title":"Plugins","text":"A plugin extends the compiler with some combination of target backends, options, passes, or pipelines. For documentation on compiler plugins, see compiler/PluginAPI/README.md
.
"},{"location":"reference/bindings/c-api/#usage","title":"Usage","text":"This snippet shows the general layout of the API. For working examples, see the samples below.
To build a custom tool using the compiler API:
CMakeLists.txtset(_IREE_COMPILER_API \"${_IREE_COMPILER_ROOT}/bindings/c/iree/compiler\")\ntarget_include_directories(${_NAME} SYSTEM PRIVATE ${_IREE_COMPILER_API})\ntarget_link_libraries(${_NAME} iree_compiler_bindings_c_loader)\n
iree_compiler_demo.c#include <iree/compiler/embedding_api.h>\n#include <iree/compiler/loader.h>\n\nint main(int argc, char** argv) {\n// Load the compiler library then initialize it.\nireeCompilerLoadLibrary(\"libIREECompiler.so\");\nireeCompilerGlobalInitialize();\n\n// Create a session to track compiler state and set flags.\niree_compiler_session_t *session = ireeCompilerSessionCreate();\nireeCompilerSessionSetFlags(session, argc, argv);\n\n// Open a file as an input source to the compiler.\niree_compiler_source_t *source = NULL;\nireeCompilerSourceOpenFile(session, \"input.mlir\", &source);\n\n// Use an invocation to compile from the input source to one or more outputs.\niree_compiler_invocation_t *inv = ireeCompilerInvocationCreate(session);\nireeCompilerInvocationPipeline(inv, IREE_COMPILER_PIPELINE_STD);\n\n// Output the compiled artifact to a file.\niree_compiler_output_t *output = NULL;\nireeCompilerOutputOpenFile(\"output.vmfb\", &output);\nireeCompilerInvocationOutputVMBytecode(inv, output);\n\n// Cleanup state.\nireeCompilerInvocationDestroy(inv);\nireeCompilerOutputDestroy(output);\nireeCompilerSourceDestroy(source);\nireeCompilerSessionDestroy(session);\nireeCompilerGlobalShutdown();\n}\n
"},{"location":"reference/bindings/c-api/#samples","title":"Samples","text":"Project Source Description iree-org/iree-template-compiler-cmake hello_compiler.c
Compiler application template openxla/iree integrations/pjrt/.../iree_compiler.cc
JIT for TensorFlow + JAX to IREE openxla/iree compiler/plugins
In-tree supported compiler plugins openxla/iree samples/compiler_plugins/
In-tree sample compiler plugins nod-ai/iree-amd-aie plugins/.../iree-amd-aie
Early-phase plugins for interfacing with AMD AIE accelerators"},{"location":"reference/bindings/c-api/#runtime-api","title":"Runtime API","text":"The IREE runtime is structured as a modular set of library components. Each component is designed to be linked into applications directly and compiled with LTO style optimizations.
The low level library components can be used directly or through a higher level API.
High level APILow level API The high level 'runtime' API sits on top of the low level components. It is relatively terse but does not expose the full flexibility of the underlying systems.
graph TD\n accTitle: IREE runtime high level API diagram\n accDescr {\n The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n their own types and API methods.\n A high level \"runtime API\" sits on top of these component APIs.\n Applications can interface indirectly with the IREE runtime via this\n high level runtime API.\n }\n\n subgraph iree_runtime[IREE Runtime]\n subgraph base\n base_types(\"Types\n\n \u2022 allocator\n \u2022 status\n \u2022 etc.\")\n end\n\n subgraph hal[HAL]\n hal_types(\"Types\n\n \u2022 buffer\n \u2022 device\n \u2022 etc.\")\n\n hal_drivers(\"Drivers\n\n \u2022 local-*\n \u2022 vulkan\n \u2022 etc.\")\n end\n\n subgraph vm[VM]\n vm_types(\"Types\n\n \u2022 context\n \u2022 invocation\n \u2022 etc.\")\n end\n\n runtime_api(\"Runtime API\n\n \u2022 instance\n \u2022 session\n \u2022 call\")\n\n base_types & hal_types & hal_drivers & vm_types --> runtime_api\n end\n\n application(Your application)\n\n runtime_api --> application
Each runtime component has its own low level API. The low level APIs are typically verbose as they expose the full flexibility of each underlying system.
graph TD\n accTitle: IREE runtime low level API diagram\n accDescr {\n The IREE runtime includes 'base', 'HAL', and 'VM' components, each with\n their own types and API methods.\n Applications can interface directly with the IREE runtime via the low\n level component APIs.\n }\n\n subgraph iree_runtime[IREE Runtime]\n subgraph base\n base_types(\"Types\n\n \u2022 allocator\n \u2022 status\n \u2022 etc.\")\n end\n subgraph hal[HAL]\n hal_types(\"Types\n\n \u2022 buffer\n \u2022 device\n \u2022 etc.\")\n\n hal_drivers(\"Drivers\n\n \u2022 local-*\n \u2022 vulkan\n \u2022 etc.\")\n end\n subgraph vm[VM]\n vm_types(\"Types\n\n \u2022 context\n \u2022 invocation\n \u2022 etc.\")\n end\n end\n\n application(Your application)\n\n base_types & hal_types & hal_drivers & vm_types --> application
Runtime API header files are organized by component:
Component header file Overview iree/runtime/api.h
High level runtime API iree/base/api.h
Core API, type definitions, ownership policies, utilities iree/vm/api.h
VM APIs: loading modules, I/O, calling functions iree/hal/api.h
HAL APIs: device management, synchronization, accessing hardware features"},{"location":"reference/bindings/c-api/#high-level-concepts","title":"High level concepts","text":"The high level API uses instances, sessions, and calls to run programs with a small API surface.
stateDiagram-v2\n accTitle: IREE runtime high level API state diagram\n accDescr {\n Instances track sessions and state: options, drivers, devices.\n Sessions track calls and state: a device and bytecode/VM modules.\n Calls track input and output lists.\n }\n\n state iree_runtime_instance_t {\n instance_state: state<br>- options<br>- drivers<br>- devices\n\n state iree_runtime_session_t {\n session_state: state<br>- device<br>- VM / bytecode modules\n state iree_runtime_call_t {\n inputs\n outputs\n }\n }\n }
"},{"location":"reference/bindings/c-api/#instance","title":"Instance","text":"An instance (iree_runtime_instance_t
) isolates runtime usage and manages device resources.
- Instances may service multiple sessions to avoid extra device interaction and reuse caches/pools.
- Separate instances are isolated/sandboxed from one another.
"},{"location":"reference/bindings/c-api/#session","title":"Session","text":"A session (iree_runtime_session_t
) contains a set of loaded modules and their state.
- Sessions that share an instance may share resources directly.
- Sessions that do not share an instance can transfer resources using import and export APIs.
"},{"location":"reference/bindings/c-api/#call","title":"Call","text":"A call (iree_runtime_call_t
) is a stateful VM function call builder.
- Calls can be reused to avoid having to construct input lists for each invocation.
"},{"location":"reference/bindings/c-api/#low-level-concepts","title":"Low level concepts","text":""},{"location":"reference/bindings/c-api/#base","title":"Base","text":"Under construction, more coming soon
"},{"location":"reference/bindings/c-api/#vm","title":"VM","text":"IREE uses its own Virtual Machine (VM) at runtime to interpret program instructions on the host system.
Tip - EmitC alternate lowering path VM instructions may be further lowered to C source code for static or resource constrained deployment.
See the --output-format=vm-c
compiler option and the samples in samples/emitc_modules/
for more information.
The VM supports generic operations like loads, stores, arithmetic, function calls, and control flow. The VM builds streams of more complex program logic and dense math into HAL command buffers that are dispatched to hardware backends.
- VM instances can serve multiple isolated execution contexts.
- VM contexts are effectively sandboxes for loading modules and running programs.
-
VM modules provide all functionality to execution contexts, including access to hardware accelerators through the HAL. Compiled user programs are also modules.
stateDiagram-v2\n accTitle: Sample VM Modules\n accDescr {\n Bytecode modules contain program state, program functions, and debug\n information.\n HAL modules contain devices, executables, HAL functions, and HAL types.\n Custom modules may contain external functions and custom types.\n }\n\n state \"Bytecode module\" as bytecode {\n bytecode_contents: Module state<br>Program functions<br>Debug information\n }\n\n state \"HAL module\" as HAL {\n hal_contents: Devices<br>Executables<br>HAL functions<br>HAL types\n }\n\n state \"Custom module\" as custom {\n custom_contents: External functions<br>Custom types\n }
"},{"location":"reference/bindings/c-api/#hal","title":"HAL","text":"IREE uses a Hardware Abstraction Layer (HAL) to model and interact with hardware devices like CPUs, GPUs and other accelerators.
- HAL drivers are used to enumerate and create HAL devices.
- HAL devices interface with hardware, such as by allocating device memory, preparing executables, recording and dispatching command buffers, and synchronizing with the host.
- HAL buffers represent data storage and buffer views represent views into that storage with associated shapes and types (similar to \"tensors\").
"},{"location":"reference/bindings/c-api/#usage_1","title":"Usage","text":"For other examples, see the samples below.
hello_world_terse.chello_world_explained.c Source file: runtime/src/iree/runtime/demo/hello_world_terse.c
runtime/src/iree/runtime/demo/hello_world_terse.c#include <stdio.h>\n\n#include \"iree/runtime/api.h\"\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance);\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session);\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n\nint main(int argc, char** argv) {\n// Create and configure the instance shared across all sessions.\niree_runtime_instance_options_t instance_options;\niree_runtime_instance_options_initialize(&instance_options);\niree_runtime_instance_options_use_all_available_drivers(&instance_options);\niree_runtime_instance_t* instance = NULL;\nIREE_CHECK_OK(iree_runtime_instance_create(\n&instance_options, iree_allocator_system(), &instance));\n\n// All sessions should share the same instance.\niree_runtime_demo_run_session(instance);\n\niree_runtime_instance_release(instance);\nreturn 0;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n\nstatic void iree_runtime_demo_run_session(iree_runtime_instance_t* instance) {\n// TODO(#5724): move device selection into the compiled modules.\niree_hal_device_t* device = NULL;\nIREE_CHECK_OK(iree_runtime_instance_try_create_default_device(\ninstance, iree_make_cstring_view(\"local-task\"), &device));\n\n// Create one session per loaded module to hold the module state.\niree_runtime_session_options_t session_options;\niree_runtime_session_options_initialize(&session_options);\niree_runtime_session_t* session = NULL;\nIREE_CHECK_OK(iree_runtime_session_create_with_device(\ninstance, &session_options, device,\niree_runtime_instance_host_allocator(instance), &session));\niree_hal_device_release(device);\n\n// Load your user module into the session (from memory, from file, etc).\nconst iree_file_toc_t* module_file =\niree_runtime_testdata_simple_mul_module_create();\nIREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_memory(\nsession, iree_make_const_byte_span(module_file->data, module_file->size),\niree_allocator_null()));\n\n// Run your functions; you should reuse the session to make multiple calls.\niree_runtime_demo_perform_mul(session);\n\niree_runtime_session_release(session);\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n\n// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->\n// tensor<4xf32>\nstatic void iree_runtime_demo_perform_mul(iree_runtime_session_t* session) {\niree_runtime_call_t call;\nIREE_CHECK_OK(iree_runtime_call_initialize_by_name(\nsession, iree_make_cstring_view(\"module.simple_mul\"), &call));\n\n// %arg0: tensor<4xf32>\niree_hal_buffer_view_t* arg0 = NULL;\nstatic const iree_hal_dim_t arg0_shape[1] = {4};\nstatic const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\nIREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\niree_runtime_session_device(session),\niree_runtime_session_device_allocator(session),\nIREE_ARRAYSIZE(arg0_shape), arg0_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\nIREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n(iree_hal_buffer_params_t){\n.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n.access = IREE_HAL_MEMORY_ACCESS_ALL,\n.usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n},\niree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &arg0));\nIREE_CHECK_OK(iree_hal_buffer_view_fprint(\nstdout, arg0, /*max_element_count=*/4096,\niree_runtime_session_host_allocator(session)));\nIREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg0));\niree_hal_buffer_view_release(arg0);\n\nfprintf(stdout, \"\\n * \\n\");\n\n// %arg1: tensor<4xf32>\niree_hal_buffer_view_t* arg1 = NULL;\nstatic const iree_hal_dim_t arg1_shape[1] = {4};\nstatic const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\nIREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(\niree_runtime_session_device(session),\niree_runtime_session_device_allocator(session),\nIREE_ARRAYSIZE(arg1_shape), arg1_shape, IREE_HAL_ELEMENT_TYPE_FLOAT_32,\nIREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n(iree_hal_buffer_params_t){\n.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n.access = IREE_HAL_MEMORY_ACCESS_ALL,\n.usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n},\niree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1));\nIREE_CHECK_OK(iree_hal_buffer_view_fprint(\nstdout, arg1, /*max_element_count=*/4096,\niree_runtime_session_host_allocator(session)));\nIREE_CHECK_OK(iree_runtime_call_inputs_push_back_buffer_view(&call, arg1));\niree_hal_buffer_view_release(arg1);\n\nIREE_CHECK_OK(iree_runtime_call_invoke(&call, /*flags=*/0));\n\nfprintf(stdout, \"\\n = \\n\");\n\n// -> tensor<4xf32>\niree_hal_buffer_view_t* ret0 = NULL;\nIREE_CHECK_OK(iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0));\nIREE_CHECK_OK(iree_hal_buffer_view_fprint(\nstdout, ret0, /*max_element_count=*/4096,\niree_runtime_session_host_allocator(session)));\niree_hal_buffer_view_release(ret0);\n\niree_runtime_call_deinitialize(&call);\n}\n
Source file: runtime/src/iree/runtime/demo/hello_world_explained.c
runtime/src/iree/runtime/demo/hello_world_explained.c#include <stdio.h>\n\n#include \"iree/runtime/api.h\"\n\nstatic int iree_runtime_demo_main(void);\nstatic iree_status_t iree_runtime_demo_run_session(\niree_runtime_instance_t* instance);\nstatic iree_status_t iree_runtime_demo_perform_mul(\niree_runtime_session_t* session);\n\n#if defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_COMMAND_LINE_ARG)\n\nstatic const char* demo_file_path = NULL;\n\n// Takes the first argument on the command line as a file path and loads it.\nint main(int argc, char** argv) {\nif (argc < 2) {\nfprintf(stderr, \"usage: session_demo module_file.vmfb\\n\");\nreturn 1;\n}\ndemo_file_path = argv[1];\nreturn iree_runtime_demo_main();\n}\n\n// Loads a compiled IREE module from the file system.\nstatic iree_status_t iree_runtime_demo_load_module(\niree_runtime_session_t* session) {\nreturn iree_runtime_session_append_bytecode_module_from_file(session,\ndemo_file_path);\n}\n\n#elif defined(IREE_RUNTIME_DEMO_LOAD_FILE_FROM_EMBEDDED_DATA)\n\n#include \"iree/runtime/testdata/simple_mul_module_c.h\"\n\nint main(int argc, char** argv) { return iree_runtime_demo_main(); }\n\n// Loads the bytecode module directly from memory.\n//\n// Embedding the compiled output into your binary is not always possible (or\n// recommended) but is a fairly painless way to get things working on a variety\n// of targets without worrying about how to deploy files or pass flags.\n//\n// In cases like this the module file is in .rodata and does not need to be\n// freed; if the memory needs to be released when the module is unloaded then a\n// custom allocator can be provided to get a callback instead.\nstatic iree_status_t iree_runtime_demo_load_module(\niree_runtime_session_t* session) {\nconst iree_file_toc_t* module_file =\niree_runtime_testdata_simple_mul_module_create();\nreturn iree_runtime_session_append_bytecode_module_from_memory(\nsession, iree_make_const_byte_span(module_file->data, module_file->size),\niree_allocator_null());\n}\n\n#else\n#error \"must specify a way to load the module data\"\n#endif // IREE_RUNTIME_DEMO_LOAD_FILE_FROM_*\n\n//===----------------------------------------------------------------------===//\n// 1. Entry point / shared iree_runtime_instance_t setup\n//===----------------------------------------------------------------------===//\n// Applications should create and share a single instance across all sessions.\n\n// This would live in your application startup/shutdown code or scoped to the\n// usage of IREE. Creating and destroying instances is expensive and should be\n// avoided.\nstatic int iree_runtime_demo_main(void) {\n// Set up the shared runtime instance.\n// An application should usually only have one of these and share it across\n// all of the sessions it has. The instance is thread-safe, while the\n// sessions are only thread-compatible (you need to lock if its required).\niree_runtime_instance_options_t instance_options;\niree_runtime_instance_options_initialize(&instance_options);\niree_runtime_instance_options_use_all_available_drivers(&instance_options);\niree_runtime_instance_t* instance = NULL;\niree_status_t status = iree_runtime_instance_create(\n&instance_options, iree_allocator_system(), &instance);\n\n// Run the demo.\n// A real application would load its models (at startup, on-demand, etc) and\n// retain them somewhere to be reused. Startup time and likelihood of failure\n// varies across different HAL backends; the synchronous CPU backend is nearly\n// instantaneous and will never fail (unless out of memory) while the Vulkan\n// backend may take significantly longer and fail if there are not supported\n// devices.\nif (iree_status_is_ok(status)) {\nstatus = iree_runtime_demo_run_session(instance);\n}\n\n// Release the shared instance - it will be deallocated when all sessions\n// using it have been released (here it is deallocated immediately).\niree_runtime_instance_release(instance);\n\nint ret = (int)iree_status_code(status);\nif (!iree_status_is_ok(status)) {\n// Dump nice status messages to stderr on failure.\n// An application can route these through its own logging infrastructure as\n// needed. Note that the status is a handle and must be freed!\niree_status_fprint(stderr, status);\niree_status_ignore(status);\n}\nreturn ret;\n}\n\n//===----------------------------------------------------------------------===//\n// 2. Load modules and initialize state in iree_runtime_session_t\n//===----------------------------------------------------------------------===//\n// Each instantiation of a module will live in its own session. Module state\n// like variables will be retained across calls within the same session.\n\n// Loads the demo module and uses it to perform some math.\n// In a real application you'd want to hang on to the iree_runtime_session_t\n// and reuse it for future calls - especially if it holds state internally.\nstatic iree_status_t iree_runtime_demo_run_session(\niree_runtime_instance_t* instance) {\n// TODO(#5724): move device selection into the compiled modules.\niree_hal_device_t* device = NULL;\nIREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(\ninstance, iree_make_cstring_view(\"local-task\"), &device));\n\n// Set up the session to run the demo module.\n// Sessions are like OS processes and are used to isolate modules from each\n// other and hold runtime state such as the variables used within the module.\n// The same module loaded into two sessions will see their own private state.\niree_runtime_session_options_t session_options;\niree_runtime_session_options_initialize(&session_options);\niree_runtime_session_t* session = NULL;\niree_status_t status = iree_runtime_session_create_with_device(\ninstance, &session_options, device,\niree_runtime_instance_host_allocator(instance), &session);\niree_hal_device_release(device);\n\n// Load the compiled user module in a demo-specific way.\n// Applications could specify files, embed the outputs directly in their\n// binaries, fetch them over the network, etc.\nif (iree_status_is_ok(status)) {\nstatus = iree_runtime_demo_load_module(session);\n}\n\n// Build and issue the call.\nif (iree_status_is_ok(status)) {\nstatus = iree_runtime_demo_perform_mul(session);\n}\n\n// Release the session and free all resources.\niree_runtime_session_release(session);\nreturn status;\n}\n\n//===----------------------------------------------------------------------===//\n// 3. Call a function within a module with buffer views\n//===----------------------------------------------------------------------===//\n// The inputs and outputs of a call are reusable across calls (and possibly\n// across sessions depending on device compatibility) and can be setup by the\n// application as needed. For example, an application could perform\n// multi-threaded buffer view creation and then issue the call from a single\n// thread when all inputs are ready. This simple demo just allocates them\n// per-call and throws them away.\n\n// Sets up and calls the simple_mul function and dumps the results:\n// func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) ->\n// tensor<4xf32>\n//\n// NOTE: this is a demo and as such this performs no memoization; a real\n// application could reuse a lot of these structures and cache lookups of\n// iree_vm_function_t to reduce the amount of per-call overhead.\nstatic iree_status_t iree_runtime_demo_perform_mul(\niree_runtime_session_t* session) {\n// Initialize the call to the function.\niree_runtime_call_t call;\nIREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(\nsession, iree_make_cstring_view(\"module.simple_mul\"), &call));\n\n// Append the function inputs with the HAL device allocator in use by the\n// session. The buffers will be usable within the session and _may_ be usable\n// in other sessions depending on whether they share a compatible device.\niree_hal_device_t* device = iree_runtime_session_device(session);\niree_hal_allocator_t* device_allocator =\niree_runtime_session_device_allocator(session);\niree_allocator_t host_allocator =\niree_runtime_session_host_allocator(session);\niree_status_t status = iree_ok_status();\n{\n// %arg0: tensor<4xf32>\niree_hal_buffer_view_t* arg0 = NULL;\nif (iree_status_is_ok(status)) {\nstatic const iree_hal_dim_t arg0_shape[1] = {4};\nstatic const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};\nstatus = iree_hal_buffer_view_allocate_buffer_copy(\ndevice, device_allocator,\n// Shape rank and dimensions:\nIREE_ARRAYSIZE(arg0_shape), arg0_shape,\n// Element type:\nIREE_HAL_ELEMENT_TYPE_FLOAT_32,\n// Encoding type:\nIREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n(iree_hal_buffer_params_t){\n// Where to allocate (host or device):\n.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n// Access to allow to this memory:\n.access = IREE_HAL_MEMORY_ACCESS_ALL,\n// Intended usage of the buffer (transfers, dispatches, etc):\n.usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n},\n// The actual heap buffer to wrap or clone and its allocator:\niree_make_const_byte_span(arg0_data, sizeof(arg0_data)),\n// Buffer view + storage are returned and owned by the caller:\n&arg0);\n}\nif (iree_status_is_ok(status)) {\nIREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\nstdout, arg0, /*max_element_count=*/4096, host_allocator));\n// Add to the call inputs list (which retains the buffer view).\nstatus = iree_runtime_call_inputs_push_back_buffer_view(&call, arg0);\n}\n// Since the call retains the buffer view we can release it here.\niree_hal_buffer_view_release(arg0);\n\nfprintf(stdout, \"\\n * \\n\");\n\n// %arg1: tensor<4xf32>\niree_hal_buffer_view_t* arg1 = NULL;\nif (iree_status_is_ok(status)) {\nstatic const iree_hal_dim_t arg1_shape[1] = {4};\nstatic const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};\nstatus = iree_hal_buffer_view_allocate_buffer_copy(\ndevice, device_allocator, IREE_ARRAYSIZE(arg1_shape), arg1_shape,\nIREE_HAL_ELEMENT_TYPE_FLOAT_32,\nIREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,\n(iree_hal_buffer_params_t){\n.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,\n.access = IREE_HAL_MEMORY_ACCESS_ALL,\n.usage = IREE_HAL_BUFFER_USAGE_DEFAULT,\n},\niree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1);\n}\nif (iree_status_is_ok(status)) {\nIREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(\nstdout, arg1, /*max_element_count=*/4096, host_allocator));\nstatus = iree_runtime_call_inputs_push_back_buffer_view(&call, arg1);\n}\niree_hal_buffer_view_release(arg1);\n}\n\n// Synchronously perform the call.\nif (iree_status_is_ok(status)) {\nstatus = iree_runtime_call_invoke(&call, /*flags=*/0);\n}\n\nfprintf(stdout, \"\\n = \\n\");\n\n// Dump the function outputs.\niree_hal_buffer_view_t* ret0 = NULL;\nif (iree_status_is_ok(status)) {\n// Try to get the first call result as a buffer view.\nstatus = iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret0);\n}\nif (iree_status_is_ok(status)) {\n// This prints the buffer view out but an application could read its\n// contents, pass it to another call, etc.\nstatus = iree_hal_buffer_view_fprint(\nstdout, ret0, /*max_element_count=*/4096, host_allocator);\n}\niree_hal_buffer_view_release(ret0);\n\niree_runtime_call_deinitialize(&call);\nreturn status;\n}\n
"},{"location":"reference/bindings/c-api/#samples_1","title":"Samples","text":"Project Source Description iree-org/iree-template-runtime-cmake hello_world.c
Runtime application template openxla/iree runtime/demo/
In-tree demos of the high level runtime API openxla/iree samples/
In-tree sample applications iree-org/iree-samples runtime-library/
Shared runtime library builderBuilds libireert.so
to aid development iml130/iree-template-cpp simple_embedding.c
Demo integration into a project"},{"location":"reference/bindings/c-api/#compiler-runtime-jit","title":"Compiler + Runtime = JIT","text":"The compiler and runtime APIs may be used together to build a \"just in time\" (JIT) execution engine. JIT compilation allows for last-minute specialization with no prior knowledge of target devices and avoids issues with version drift, but it can also constrain deployment options and usage scenarios.
"},{"location":"reference/bindings/python/","title":"Python bindings","text":"","tags":["Python"]},{"location":"reference/bindings/python/#overview","title":"Overview","text":"IREE offers Python bindings split into several packages, covering different components:
PIP package name Description iree-compiler
IREE's generic compiler tools and helpers iree-runtime
IREE's runtime, including CPU and GPU backends iree-tools-tf
Tools for importing from TensorFlow iree-tools-tflite
Tools for importing from TensorFlow Lite iree-jax
Tools for importing from JAX Collectively, these packages allow for importing from frontends, compiling towards various targets, and executing compiled code on IREE's backends.
","tags":["Python"]},{"location":"reference/bindings/python/#prerequisites","title":"Prerequisites","text":"To use IREE's Python bindings, you will first need to install Python 3 and pip, as needed.
Tip - Virtual environments We recommend using virtual environments to manage python packages, such as through venv
(about, tutorial):
Linux macOS Windows python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\nsource .venv/bin/activate\n
python -m venv .venv\n.venv\\Scripts\\activate.bat\n
When done, run deactivate
.
","tags":["Python"]},{"location":"reference/bindings/python/#installing-iree-packages","title":"Installing IREE packages","text":"","tags":["Python"]},{"location":"reference/bindings/python/#prebuilt-packages","title":"Prebuilt packages","text":"Stable releases Nightly releases Stable release packages are published to PyPI.
python -m pip install \\\niree-compiler \\\niree-runtime\n
Nightly releases are published on GitHub releases.
python -m pip install \\\n--find-links https://iree.dev/pip-release-links.html \\\n--upgrade \\\niree-compiler \\\niree-runtime\n
","tags":["Python"]},{"location":"reference/bindings/python/#building-from-source","title":"Building from source","text":"See Building Python bindings page for instructions for building from source.
","tags":["Python"]},{"location":"reference/bindings/python/#usage","title":"Usage","text":"Info - API reference pages
API reference pages for IREE's runtime and compiler Python APIs are hosted on readthedocs.
Documentation for the MLIR compiler Python APIs can be found at https://mlir.llvm.org/docs/Bindings/Python/.
","tags":["Python"]},{"location":"reference/bindings/python/#compile-a-program","title":"Compile a program","text":"from iree import compiler as ireec\n\n# Compile a module.\nINPUT_MLIR = \"\"\"\nmodule @arithmetic {\n func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {\n %0 = arith.mulf %arg0, %arg1 : tensor<4xf32>\n return %0 : tensor<4xf32>\n }\n}\n\"\"\"\n\n# Compile using the vmvx (reference) target:\ncompiled_flatbuffer = ireec.tools.compile_str(\n INPUT_MLIR,\n target_backends=[\"vmvx\"])\n
","tags":["Python"]},{"location":"reference/bindings/python/#run-a-compiled-program","title":"Run a compiled program","text":"from iree import runtime as ireert\nimport numpy as np\n\n# Register the module with a runtime context.\n# Use the \"local-task\" CPU driver, which can load the vmvx executable:\nconfig = ireert.Config(\"local-task\")\nctx = ireert.SystemContext(config=config)\nvm_module = ireert.VmModule.copy_buffer(ctx.instance, compiled_flatbuffer)\nctx.add_vm_module(vm_module)\n\n# Invoke the function and print the result.\nprint(\"INVOKE simple_mul\")\narg0 = np.array([1., 2., 3., 4.], dtype=np.float32)\narg1 = np.array([4., 5., 6., 7.], dtype=np.float32)\nf = ctx.modules.arithmetic[\"simple_mul\"]\nresults = f(arg0, arg1).to_host()\nprint(\"Results:\", results)\n
","tags":["Python"]},{"location":"reference/bindings/python/#samples","title":"Samples","text":"Check out the samples in IREE's samples/colab/ directory and the iree-samples repository for examples using the Python APIs.
","tags":["Python"]},{"location":"reference/bindings/python/#console-scripts","title":"Console scripts","text":"The Python packages include console scripts for most of IREE's native tools like iree-compile
and iree-run-module
. After installing a package from pip, these should be added to your path automatically:
$ python -m pip install iree-runtime\n$ which iree-run-module\n\n/projects/.venv/Scripts/iree-run-module\n
","tags":["Python"]},{"location":"reference/bindings/python/#profiling","title":"Profiling","text":"The tools in the iree-runtime
package support variants:
Variant name Description default Standard runtime tools tracy Runtime tools instrumented using the Tracy profiler Switch between variants of the installed tools using the IREE_PY_RUNTIME
environment variable:
IREE_PY_RUNTIME=tracy iree-run-module ...\n
See the developer documentation page on Profiling with Tracy for information on using Tracy.
","tags":["Python"]},{"location":"reference/mlir-dialects/","title":"MLIR dialects","text":"These pages contain automatically generated documentation for the MLIR dialects defined in the IREE repository. IREE also makes extensive use of dialects from the upstream MLIR repository, which are documented at https://mlir.llvm.org/docs/Dialects/.
"},{"location":"reference/mlir-dialects/#iree-internal-dialects","title":"IREE internal dialects","text":"These dialects are an implementation detail of the IREE compiler, though they can be used by plugins and other advanced integrations. The sources for most of these dialects can be found in the iree/compiler/Dialect/
directory.
Dialect Description Check Defines assertions for IREE tests Flow Models execution data flow and partitioning HAL Represents operations against the IREE HAL1 HAL/Inline Inline HAL interop runtime module dialect HAL/Loader HAL inline executable loader runtime module dialect IO/Parameters External parameter resource management APIs Stream Model execution partitioning and scheduling Util Types and ops common across IREE subdialects VM Represents operations against an abstract virtual machine VMVX Virtual Machine Vector Extensions"},{"location":"reference/mlir-dialects/#iree-public-dialects","title":"IREE public dialects","text":"The ops in these dialects are legal to include in compiler inputs. The sources for these dialects can be found in the llvm-external-projects/iree-dialects/
directory that is designed to be used from other projects via LLVM's external projects mechanism.
Dialect Description IREEInput Structural ops legal as input to IREE's compiler IREELinalgExt Extensions to the Linalg dialect for specific operations IREEVectorExt Extensions to the Vector dialect for specific operations -
Hardware Abstraction Layer\u00a0\u21a9
"},{"location":"reference/mlir-dialects/Check/","title":"Check","text":""},{"location":"reference/mlir-dialects/Check/#check-dialect","title":"'check' Dialect","text":"A dialect implementing test assertions for IREE modules.
- 'check' Dialect
- Operation definition
- check.expect_all_true (Check::ExpectAllTrueOp)
- check.expect_almost_eq (Check::ExpectAlmostEqOp)
- check.expect_almost_eq_const (Check::ExpectAlmostEqConstOp)
- check.expect_eq (Check::ExpectEqOp)
- check.expect_eq_const (Check::ExpectEqConstOp)
- check.expect_false (Check::ExpectFalseOp)
- check.expect_true (Check::ExpectTrueOp)
"},{"location":"reference/mlir-dialects/Check/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Check/#checkexpect_all_true-checkexpectalltrueop","title":"check.expect_all_true
(Check::ExpectAllTrueOp)","text":"Checks that the operand contains only values that are true
Syntax:
operation ::= `check.expect_all_true` (`` `<` $device^ `>`)?\n `` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains true values, which are represented by any non-zero integer.
Issues a non-fatal failure if the verification fails.
check.expect_all_true<%device>(%arg0) : !hal.buffer_view\ncheck.expect_all_true(%arg1) : tensor<2x2xi32>\n
"},{"location":"reference/mlir-dialects/Check/#operands","title":"Operands:","text":"Operand Description device
device operand
buffer_view or tensor of signless integer values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq-checkexpectalmosteqop","title":"check.expect_almost_eq
(Check::ExpectAlmostEqOp)","text":"Checks that the operands are almost equal
Syntax:
operation ::= `check.expect_almost_eq` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n
Verifies that the buffer view or tensor operands with float elements are almost equal to within an implementation-defined \"reasonable\" tolerance.
Issues a non-fatal failure if the verification fails.
check.expect_almost_eq(%arg0, %arg1) : tensor<5xf32>\n
"},{"location":"reference/mlir-dialects/Check/#operands_1","title":"Operands:","text":"Operand Description device
device lhs
buffer_view or tensor of floating-point values rhs
buffer_view or tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_almost_eq_const-checkexpectalmosteqconstop","title":"check.expect_almost_eq_const
(Check::ExpectAlmostEqConstOp)","text":"Checks that the tensor operand is almost equal to some constant
Syntax:
operation ::= `check.expect_almost_eq_const` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n
Verifies that the tensor operand with float elements is almost equal to the constant attribute within an implementation-defined \"reasonable\" tolerance.
Issues a non-fatal failure if the verification fails.
This op is just a convenience wrapper around the expect_almost_eq op.
check.expect_almost_eq_const(%const0, dense<[0.999999, 2.0]> : tensor<5xf32>) : tensor<5xf32>\n
"},{"location":"reference/mlir-dialects/Check/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_2","title":"Operands:","text":"Operand Description device
device lhs
tensor of floating-point values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq-checkexpecteqop","title":"check.expect_eq
(Check::ExpectEqOp)","text":"Checks that the tensor or buffer view operands are equal
Syntax:
operation ::= `check.expect_eq` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $rhs `)` attr-dict `:` type($lhs)\n
Verifies that the operands are exactly equal.
Issues a non-fatal failure if the verification fails.
check.expect_eq(%arg0, %arg1) : tensor<5xi32>\n
"},{"location":"reference/mlir-dialects/Check/#operands_3","title":"Operands:","text":"Operand Description device
device lhs
buffer_view or tensor of any type values rhs
buffer_view or tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_eq_const-checkexpecteqconstop","title":"check.expect_eq_const
(Check::ExpectEqConstOp)","text":"Checks that the tensor operand is equal to some constant
Syntax:
operation ::= `check.expect_eq_const` (`` `<` $device^ `>`)?\n `` `(` $lhs `,` $value `)` attr-dict `:` type($lhs)\n
Verifies that the tensor operand is exactly equal to a constant attribute.
Issues a non-fatal failure if the verification fails.
This op is just a convenience wrapper around the expect_eq op.
check.expect_eq_const(%arg0, dense<[1, 2]> : tensor<2xi32>) : tensor<2xi32>\n
"},{"location":"reference/mlir-dialects/Check/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Check/#operands_4","title":"Operands:","text":"Operand Description device
device lhs
tensor of any type values"},{"location":"reference/mlir-dialects/Check/#checkexpect_false-checkexpectfalseop","title":"check.expect_false
(Check::ExpectFalseOp)","text":"Checks that the operand is false
Syntax:
operation ::= `check.expect_false` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains a false value, which is represented by zero.
Issues a non-fatal failure if the verification fails.
check.expect_false(%arg0) : i32\n
"},{"location":"reference/mlir-dialects/Check/#operands_5","title":"Operands:","text":"Operand Description operand
signless integer"},{"location":"reference/mlir-dialects/Check/#checkexpect_true-checkexpecttrueop","title":"check.expect_true
(Check::ExpectTrueOp)","text":"Checks that the operand is true
Syntax:
operation ::= `check.expect_true` `(` $operand `)` attr-dict `:` type($operand)\n
Verifies that the operand contains a true value, which is represented by any non-zero integer.
Issues a non-fatal failure if the verification fails.
check.expect_true(%arg0) : i32\n
"},{"location":"reference/mlir-dialects/Check/#operands_6","title":"Operands:","text":"Operand Description operand
signless integer"},{"location":"reference/mlir-dialects/Flow/","title":"Flow","text":""},{"location":"reference/mlir-dialects/Flow/#flow-dialect","title":"'flow' Dialect","text":"A dialect designed to model execution data flow and partitioning.
The flow dialect is used to model regions of dense computation and the data flow between them. MLIR value-semantic tensors are used as the primary data type to allow SSA use-def to provide a bulk of the infrastructure required to perform the computation partitioning and outlining.
The dialect is designed to ingest relatively high-level linear algebra via XLA HLO ops (that also operate on the value-semantic tensor types) and optionally MLIR standard ops for control flow and other actions. After conversion of any higher-level ops that have special semantics in the flow dialect, such as global variables, the rest are partitioned into regions containing simple and compatible computations. Finally, outlining moves the computations into executables and leaves only the execution flow encoded via dispatch operations.
The primary unit of interest is a \"dispatch region\" containing compatible computations that can be scheduled together efficiently (and safely). \"Compatible\" here is specified as similarly shaped workloads that indicate how many invocations a computation can be parallelized across when running in a SPMD execution model. Though it depends on the particular runtime backends this more concretely means things like the untiled workload (or tiled workgroups) used in GPU dispatches or similar thread pool executors.
After identification of the dispatchable regions a set of transformations performs folding and simplification to reduce the total number of dispatches. Heuristics are used in certain cases to more efficiently schedule special ops (such as GEMM) and the design is amenable to profile- guided analysis that can be added in the future.
The resulting outlined executable modules containing the dispatchable code can be translated to one or more backends (such as SPIR-V for Vulkan, or LLVM IR for running on the CPU, etc). The IR that is outlined is untouched and in the input format (such as XLA HLO ops) allowing conversion using any MLIR target that supports ingesting such input. A few special ops are used to communicate statically available information such as the expected workload size, shapes of inputs and outputs, etc.
- 'flow' Dialect
- Operation definition
- Collective communication ops
- flow.channel.count (Flow::ChannelCountOp)
- flow.channel.default (Flow::ChannelDefaultOp)
- flow.channel.rank (Flow::ChannelRankOp)
- flow.channel.split (Flow::ChannelSplitOp)
- flow.collective.all_gather (Flow::CollectiveAllGatherOp)
- flow.collective.all_reduce (Flow::CollectiveAllReduceOp)
- flow.collective.all_to_all (Flow::CollectiveAllToAllOp)
- flow.collective.reduce_scatter (Flow::CollectiveReduceScatterOp)
- flow.collective.send_recv (Flow::CollectiveSendRecvOp)
- Dispatch ops
- flow.dispatch (Flow::DispatchOp)
- Executable ops
- flow.executable_end (Flow::ExecutableEndOp)
- flow.executable.export (Flow::ExecutableExportOp)
- flow.executable (Flow::ExecutableOp)
- Partitioned region ops
- flow.dispatch.region (Flow::DispatchRegionOp)
- flow.dispatch.tensor.load (Flow::DispatchTensorLoadOp)
- flow.dispatch.tensor.store (Flow::DispatchTensorStoreOp)
- flow.dispatch.tie_shape (Flow::DispatchTieShapeOp)
- flow.dispatch.workgroup.count (Flow::DispatchWorkgroupCountOp)
- flow.dispatch.workgroup.id (Flow::DispatchWorkgroupIDOp)
- flow.dispatch.workgroup.size (Flow::DispatchWorkgroupSizeOp)
- flow.dispatch.workgroups (Flow::DispatchWorkgroupsOp)
- flow.return (Flow::ReturnOp)
- Streamable call ops
- flow.call (Flow::CallOp)
- flow.func (Flow::FuncOp)
- Tensor ops
- flow.dispatch.workgroup_count_from_dag_root (Flow::DispatchWorkgroupCountFromDagRootOp)
- flow.dispatch.workgroup_count_from_slice (Flow::DispatchWorkgroupCountFromSliceOp)
- flow.dispatch.workload.ordinal (Flow::DispatchWorkloadOrdinalOp)
- flow.tensor.alloca (Flow::TensorAllocaOp)
- flow.tensor.bitcast (Flow::TensorBitCastOp)
- flow.tensor.clone (Flow::TensorCloneOp)
- flow.tensor.constant (Flow::TensorConstantOp)
- flow.tensor.empty (Flow::TensorEmptyOp)
- flow.tensor.load (Flow::TensorLoadOp)
- flow.tensor.reshape (Flow::TensorReshapeOp)
- flow.tensor.slice (Flow::TensorSliceOp)
- flow.tensor.splat (Flow::TensorSplatOp)
- flow.tensor.store (Flow::TensorStoreOp)
- flow.tensor.tie_shape (Flow::TensorTieShapeOp)
- flow.tensor.trace (Flow::TensorTraceOp)
- flow.tensor.update (Flow::TensorUpdateOp)
- Attribute definition
- DummyAttr
- Type constraint definition
- dispatch.tensor
- dispatch.tensor
- dispatch.tensor
- Type definition
- ChannelType
- DummyType
"},{"location":"reference/mlir-dialects/Flow/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Flow/#collective-communication-ops","title":"Collective communication ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowchannelcount-flowchannelcountop","title":"flow.channel.count
(Flow::ChannelCountOp)","text":"Returns the total number of participants in the group
Syntax:
operation ::= `flow.channel.count` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the total participant count in the collective communicator group.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands","title":"Operands:","text":"Operand Description channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowchanneldefault-flowchanneldefaultop","title":"flow.channel.default
(Flow::ChannelDefaultOp)","text":"Returns a default collective communication channel
Syntax:
operation ::= `flow.channel.default` ($group^)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a channel initialized using the runtime environment.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription group
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#results_1","title":"Results:","text":"Result Description result
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowchannelrank-flowchannelrankop","title":"flow.channel.rank
(Flow::ChannelRankOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `flow.channel.rank` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_1","title":"Operands:","text":"Operand Description channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowchannelsplit-flowchannelsplitop","title":"flow.channel.split
(Flow::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `flow.channel.split` $channel `,` $color `,` $key\n `:` type($channel) `->` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group.
Interfaces: InferTypeOpInterface, OpAsmOpInterface
"},{"location":"reference/mlir-dialects/Flow/#operands_2","title":"Operands:","text":"Operand Description channel
a collecive communication channel color
index key
index"},{"location":"reference/mlir-dialects/Flow/#results_3","title":"Results:","text":"Result Description result
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_gather-flowcollectiveallgatherop","title":"flow.collective.all_gather
(Flow::CollectiveAllGatherOp)","text":"Performs all-gather operation
Syntax:
operation ::= `flow.collective.all_gather` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
It gathers data from all ranks and concatenates them on the 0-th dimension. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_3","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_4","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_reduce-flowcollectiveallreduceop","title":"flow.collective.all_reduce
(Flow::CollectiveAllReduceOp)","text":"Performs all-reduce operation
Syntax:
operation ::= `flow.collective.all_reduce` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation reduces data across all the ranks in the channel. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription reduction_op
mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_4","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_5","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectiveall_to_all-flowcollectivealltoallop","title":"flow.collective.all_to_all
(Flow::CollectiveAllToAllOp)","text":"Performs all-to-all operation
Syntax:
operation ::= `flow.collective.all_to_all` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
This operation mutually exchanges data acrosss all of the ranks in the channel. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_5","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_6","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivereduce_scatter-flowcollectivereducescatterop","title":"flow.collective.reduce_scatter
(Flow::CollectiveReduceScatterOp)","text":"Performs reduce and scatter operations
Syntax:
operation ::= `flow.collective.reduce_scatter` $reduction_op `,` $element_type `,` $target `,` $source `,` $channel `:`\n `(` type($target) `,` type($source) `,` type($channel) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation reduces data across all the ranks in the channel and scatters the result to each rank. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription reduction_op
mlir::iree_compiler::IREE::Flow::CollectiveReductionOpAttrvalid CollectiveReductionOp element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_6","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel"},{"location":"reference/mlir-dialects/Flow/#results_7","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowcollectivesend_recv-flowcollectivesendrecvop","title":"flow.collective.send_recv
(Flow::CollectiveSendRecvOp)","text":"Performs a grouped send and receive operation
Syntax:
operation ::= `flow.collective.send_recv` $element_type `,` $target `,` $source `,` $channel `,` $send `,` $recv `:`\n `(` type($target) `,` type($source) `,` type($channel) `,` type($send) `,` type($recv) `)` `->`\n custom<ShapedTiedResult>(type($result), $target_dims, $tied_operands)\n attr-dict-with-keyword\n
The operation sends data to the rank specificied by send and receives data from the rank specified by recv. If send is -1, this rank will not send any data. If recv is -1, this rank will not receive any data and the output will be all zeros. Interfaces: InferTypeOpInterface, TiedOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::iree_compiler::IREE::Flow::CollectiveElementTypeAttrvalid CollectiveElementType tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_7","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index source
ranked tensor of any type values channel
a collecive communication channel send
index recv
index"},{"location":"reference/mlir-dialects/Flow/#results_8","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatch-flowdispatchop","title":"flow.dispatch
(Flow::DispatchOp)","text":"A dispatch of workgroups across a grid
Syntax:
operation ::= `flow.dispatch` custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Dispatches workgroups across an grid defined by the captured workload parameters carrying the information required to compute the workgroup count at runtime. The function for converting the workload into a 3D workgroup count is attached to the dispatch entry point and may contain arbitrary host logic.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_8","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_9","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#executable-ops","title":"Executable ops","text":"Executables for outlined regions.
"},{"location":"reference/mlir-dialects/Flow/#flowexecutable_end-flowexecutableendop","title":"flow.executable_end
(Flow::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `flow.executable_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/Flow/#flowexecutableexport-flowexecutableexportop","title":"flow.executable.export
(Flow::ExecutableExportOp)","text":"
Defines an executable entry point for dispatch operations
Syntax:
operation ::= `flow.executable.export` custom<SymbolVisibility>($sym_visibility)\n custom<SymbolAlias>($sym_name, $function_ref)\n custom<WorkgroupCountRegion>($workgroup_count)\n attr-dict-with-keyword\n
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.
Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Flow/#flowexecutable-flowexecutableop","title":"flow.executable
(Flow::ExecutableOp)","text":"Generic executable module
Syntax:
operation ::= `flow.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#partitioned-region-ops","title":"Partitioned region ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchregion-flowdispatchregionop","title":"flow.dispatch.region
(Flow::DispatchRegionOp)","text":"A group of ops
This op is a container/grouping of ops. It represents a fusion group before being lowered to a dispatch region. Ops are collected inside of the region body of the op. Values from parent regions can be captured. Results are yielded with a return
terminator and returned from this op.
dispatch.region
ops are lowered to dispatch.workgroups
ops. Workgroups isolated from above. dispatch.region
ops are a more lightweight abstraction for implementing fusion heuristics, i.e., the process of deciding which ops should form a dispatch region.
This op also has a second region: workload_count
. The arguments to the region represent the workload for the dispatch, and returns the number of workgroups for the dispatch. The region is lowered directly to workload_count
region of dispatch.workgroups
.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_9","title":"Operands:","text":"Operand Description result_dims
variadic of index workload
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_10","title":"Results:","text":"Result Description result
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorload-flowdispatchtensorloadop","title":"flow.dispatch.tensor.load
(Flow::DispatchTensorLoadOp)","text":"Loads a tensor from a dispatch input placeholder
Syntax:
operation ::= `flow.dispatch.tensor.load` $source\n `,` `offsets` `=` custom<DynamicIndexList>(\n $offsets, $static_offsets)\n `,` `sizes` `=` custom<DynamicIndexList>(\n $sizes, $static_sizes)\n `,` `strides` `=` custom<DynamicIndexList>(\n $strides, $static_strides)\n attr-dict `:` type($source) (`{` $source_dims^ `}`)? `->` type($result)\n
Loads an input tensor or subtensor from an input placeholder. As each workgroup executes concurrently all workgroups will receive identical loaded results of regions that may overlap.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OffsetSizeAndStrideOpInterface, ReifyRankedShapedTypeOpInterface, TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription static_offsets
::mlir::DenseI64ArrayAttri64 dense array attribute static_sizes
::mlir::DenseI64ArrayAttri64 dense array attribute static_strides
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_10","title":"Operands:","text":"Operand Description source
dispatch.tensor source_dims
variadic of index offsets
variadic of index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_11","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtensorstore-flowdispatchtensorstoreop","title":"flow.dispatch.tensor.store
(Flow::DispatchTensorStoreOp)","text":"Stores a tensor into a dispatch output placeholder
Syntax:
operation ::= `flow.dispatch.tensor.store` $value `,` $target\n `,` `offsets` `=` custom<DynamicIndexList>(\n $offsets, $static_offsets)\n `,` `sizes` `=` custom<DynamicIndexList>(\n $sizes, $static_sizes)\n `,` `strides` `=` custom<DynamicIndexList>(\n $strides, $static_strides)\n attr-dict `:` type($value) `->` type($target) (`{` $target_dims^ `}`)?\n
Stores a tensor or subtensor into an output tensor placeholder. As each workgroup executes concurrently behavior is undefined if more than one workgroup stores into overlapping regions of the full output tensor.
Traits: AttrSizedOperandSegments
Interfaces: OffsetSizeAndStrideOpInterface, Util_ShapeAwareOp
"},{"location":"reference/mlir-dialects/Flow/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription static_offsets
::mlir::DenseI64ArrayAttri64 dense array attribute static_sizes
::mlir::DenseI64ArrayAttri64 dense array attribute static_strides
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_11","title":"Operands:","text":"Operand Description value
ranked tensor of any type values target
dispatch.tensor target_dims
variadic of index offsets
variadic of index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchtie_shape-flowdispatchtieshapeop","title":"flow.dispatch.tie_shape
(Flow::DispatchTieShapeOp)","text":"Ties a runtime shape to a dispatch I/O argument
Syntax:
operation ::= `flow.dispatch.tie_shape` $operand attr-dict\n `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Metadata op used to tie a runtime-computed shape with dynamic dimensions to a dispatch input/output argument. All uses of the argument should use the pass-through result of this op to allow for SSA-based shape resolution.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_12","title":"Operands:","text":"Operand Description operand
dispatch.tensor dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_12","title":"Results:","text":"Result Description result
dispatch.tensor"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupcount-flowdispatchworkgroupcountop","title":"flow.dispatch.workgroup.count
(Flow::DispatchWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `flow.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid.
Represented as a 3D grid classically written as XYZ. Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable.
%x = flow.dispatch.workgroup.count[0] : index\n%y = flow.dispatch.workgroup.count[1] : index\n%z = flow.dispatch.workgroup.count[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_13","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupid-flowdispatchworkgroupidop","title":"flow.dispatch.workgroup.id
(Flow::DispatchWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `flow.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current workgroup in the range of [0, flow.dispatch.workgroup.count)
along each dimension.
Represented as a 3D grid classically written as XYZ. Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable.
%x = flow.dispatch.workgroup.id[0] : index\n%y = flow.dispatch.workgroup.id[1] : index\n%z = flow.dispatch.workgroup.id[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_14","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroupsize-flowdispatchworkgroupsizeop","title":"flow.dispatch.workgroup.size
(Flow::DispatchWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `flow.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Workgroup sizes are not determined at the flow dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.
Represented as a 3D grid classically written as XYZ. Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable.
%x = flow.dispatch.workgroup.size[0] : index\n%y = flow.dispatch.workgroup.size[1] : index\n%z = flow.dispatch.workgroup.size[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#results_15","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroups-flowdispatchworkgroupsop","title":"flow.dispatch.workgroups
(Flow::DispatchWorkgroupsOp)","text":"A dispatch of workgroups across a 3-dimensional grid
Syntax:
operation ::= `flow.dispatch.workgroups` (`[` $workload^ `]`)? ``\n `(` $arguments `)` `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n attr-dict-with-keyword\n `=` `\\n` ` ` ` ` ` `\n custom<DispatchWorkgroupBody>(ref(type($arguments)),\n ref(type($results)),\n $workgroup_body)\n `` custom<DispatchWorkgroupsCountRegion>($workgroup_count)\n
Dispatches some number of workgroups across a 3-dimensional grid. The body region will be invoked for each workgroup with a unique flow.dispatch.workgroup.id
in the range of [0, flow.dispatch.workgroup.count)
(along each dimension XYZ).
From the outside the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors.
Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the flow.dispatch.workgroup.*
ops to query the workgroup ID/count/size instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc).
Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it.
%r = flow.dispatch.workgroups[%c5, %c5](%0, %1)\n : (tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32> =\n (%arg0: !flow.dispatch.tensor<readonly:tensor<5x5xf32>>,\n %arg1: !flow.dispatch.tensor<readonly:tensor<5xf32>>,\n %arg2: !flow.dispatch.tensor<writeonly:tensor<5x5xf32>>) {\n ...\n}\n
The number of results of the operation is equal to the number of results in the type signature ((tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>
). Each tensor argument and result in the type signature has a corresponding block argument of type !flow.dispatch.tensor
. Furthermore, each argument has a corresponding arguments
operand.
There are no arguments
operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, -> %0
would tie the first argument to the result. In that case, there would be no separate block argument for the result.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, IsolatedFromAbove
Interfaces: ClosureOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_13","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_16","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowreturn-flowreturnop","title":"flow.return
(Flow::ReturnOp)","text":"Return from a flow.dispatch_region
Syntax:
operation ::= `flow.return` attr-dict ($operands^ `:` type($operands))?\n
Returns the given values from the region and back to the host code.
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_14","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#streamable-call-ops","title":"Streamable call ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowcall-flowcallop","title":"flow.call
(Flow::CallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `flow.call` $callee\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Calls a function taking/returning tensor values with stream semantics. Tensors have their shapes captured and may be tied to denote in-place operations. Asynchronous calls must have no side-effects.
Note that returned tensors must have their shapes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the shape (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If shapes are unknowable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.
Traits: AttrSizedOperandSegments
Interfaces: CallOpInterface, FLOW_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_ShapeAwareOp
"},{"location":"reference/mlir-dialects/Flow/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/Flow/#operands_15","title":"Operands:","text":"Operand Description arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_17","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Flow/#flowfunc-flowfuncop","title":"flow.func
(Flow::FuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `flow.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n ``\n custom<ShapedFunctionSignature>($function_type,\n $tied_operands,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via flow.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Flow/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type tied_operands
::mlir::ArrayAttr64-bit integer array attribute sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Flow/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_dag_root-flowdispatchworkgroupcountfromdagrootop","title":"flow.dispatch.workgroup_count_from_dag_root
(Flow::DispatchWorkgroupCountFromDagRootOp)","text":"Workgroup count computed based on iteration range of the root of the DAG for ops within the dispatch.
Syntax:
operation ::= `flow.dispatch.workgroup_count_from_dag_root` attr-dict $operands\n
When using tile + distribution of the root of the DAG (Directed Acyclic Graph) of ops within the dispatch to split the work amongst workgroups. The workload captured is the size of the iteration space of the root of the DAG. This op represents the computation that given the workload returns the number of workgroups to use. The backends are responsible for lowering this op into actual computation (typically based on the tile sizes used to tile and distribute the root of the DAG).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_16","title":"Operands:","text":"Operand Description operands
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_18","title":"Results:","text":"Result Description x
index y
index z
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkgroup_count_from_slice-flowdispatchworkgroupcountfromsliceop","title":"flow.dispatch.workgroup_count_from_slice
(Flow::DispatchWorkgroupCountFromSliceOp)","text":"Place holder to signify default workgroup count calculation.
Syntax:
operation ::= `flow.dispatch.workgroup_count_from_slice` attr-dict $operands\n
The default computation of the number of workgroups (or workgroup count) assumes that the dispatch + captured values is enough to compute the workgroup count. It does so by using a program slice of the values within the dispatch that represent the number of workgroups when available within the dispatch. Currently the arguments of index types captured by the flow.dispatch.workgroups
is treated as the workload for the operation. It is a requirement that the slice of the program that computes the number of workgroups will need to have its leaves be these captured values.
TODO: This could be generalized in future to allow the slices to encompass arbitrary computation. The computation of the workgroup count can then be done on the device itself, if this is data dependent. In such cases the workload could be more than just values of index types.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_17","title":"Operands:","text":"Operand Description operands
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_19","title":"Results:","text":"Result Description x
index y
index z
index"},{"location":"reference/mlir-dialects/Flow/#flowdispatchworkloadordinal-flowdispatchworkloadordinalop","title":"flow.dispatch.workload.ordinal
(Flow::DispatchWorkloadOrdinalOp)","text":"Annotates the values captured as workload within the body of flow.dispatch.workgroups
op.
Syntax:
operation ::= `flow.dispatch.workload.ordinal` attr-dict $operand `,` $ordinal `:` type($operand)\n
The arguments that represent the captured/returned values of the `flow.dispatch.workgroups, i.e. the signature of the body of the op is not preserved during IREEs compilation. Since the workloads are derived from the operands captured by the operation, this op denotes the values captured as workloads. This can be used in the backends to map back to the workload values while materializing the workgroup count computation.
TODO: Find a better way to represent this information, either by somehow propagating the signature of the created dispatch workgroup op through the compilation stack until the codegen backends, or as a separate list/attribute that can be plumbed through without using explicit ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription ordinal
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Flow/#operands_18","title":"Operands:","text":"Operand Description operand
index"},{"location":"reference/mlir-dialects/Flow/#results_20","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Flow/#flowtensoralloca-flowtensorallocaop","title":"flow.tensor.alloca
(Flow::TensorAllocaOp)","text":"An empty tensor allocation with undefined contents
Syntax:
operation ::= `flow.tensor.alloca` `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a new transient tensor allocation with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read. The resulting tensor may be long-lived and allocated as part of a dedicated allocation. Prefer using flow.tensor.empty
whenever possible as this op disables nearly all allocation-related optimizations performed by the compiler. The presence of this op is often an indication of an improper lowering.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Flow/#operands_19","title":"Operands:","text":"Operand Description result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_21","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorbitcast-flowtensorbitcastop","title":"flow.tensor.bitcast
(Flow::TensorBitCastOp)","text":"Bitcasts a tensor
Syntax:
operation ::= `flow.tensor.bitcast` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Bitcasts a tensor to a new type without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_20","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_22","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorclone-flowtensorcloneop","title":"flow.tensor.clone
(Flow::TensorCloneOp)","text":"Performs a full tensor clone operation
Syntax:
operation ::= `flow.tensor.clone` $operand `:` type($result) (`{` $argument_dims^ `}`)?\n attr-dict-with-keyword\n
Clones the input tensor into an identical output tensor.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_21","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values argument_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_23","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorconstant-flowtensorconstantop","title":"flow.tensor.constant
(Flow::TensorConstantOp)","text":"Tensor constant that can have dynamic dimensions
Syntax:
operation ::= `flow.tensor.constant` $value attr-dict `->` type($result)\n
Allows specifying a constant where the return value can erase shape information. This operation is declared as having side effects and has no folder, so will not be optimized away by the compiler. The underlying shape information should be hidden from the compiler and resolved at runtime.
%c = flow.tensor.constant tensor<2x2xf32> -> tensor<?x?xf32>\n%res = math.absf %c : tensor<?x?xf32>\n
"},{"location":"reference/mlir-dialects/Flow/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::ElementsAttrconstant vector/tensor attribute"},{"location":"reference/mlir-dialects/Flow/#results_24","title":"Results:","text":"Result Description result
tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorempty-flowtensoremptyop","title":"flow.tensor.empty
(Flow::TensorEmptyOp)","text":"An empty tensor carrying metadata but no contents
Syntax:
operation ::= `flow.tensor.empty` `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with undefined contents. Subsequent writes must populate any ranges of the tensor that are later read.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_22","title":"Operands:","text":"Operand Description result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_25","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorload-flowtensorloadop","title":"flow.tensor.load
(Flow::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `flow.tensor.load` $source (`[` $indices^ `]`)? `:`\n type($source) (`{` $source_dims^ `}`)?\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_23","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_26","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorreshape-flowtensorreshapeop","title":"flow.tensor.reshape
(Flow::TensorReshapeOp)","text":"Reshapes a tensor
Syntax:
operation ::= `flow.tensor.reshape` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Reshapes a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_24","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_27","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorslice-flowtensorsliceop","title":"flow.tensor.slice
(Flow::TensorSliceOp)","text":"Slices out a subregion of a tensor
Syntax:
operation ::= `flow.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Clones a subregion of a tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_25","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index start_indices
variadic of index lengths
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_28","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorsplat-flowtensorsplatop","title":"flow.tensor.splat
(Flow::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `flow.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_26","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type result_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_29","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensorstore-flowtensorstoreop","title":"flow.tensor.store
(Flow::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `flow.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_27","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target
ranked tensor of any type values target_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_30","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortie_shape-flowtensortieshapeop","title":"flow.tensor.tie_shape
(Flow::TensorTieShapeOp)","text":"Ties a runtime shape to a tensor value
Syntax:
operation ::= `flow.tensor.tie_shape` $operand attr-dict\n `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Metadata op used to tie tensors with their runtime-computed dynamic dimensions. This only exists transiently in the IR as a witness to shape calculations and is removed during lowering.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_28","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_31","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#flowtensortrace-flowtensortraceop","title":"flow.tensor.trace
(Flow::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `flow.tensor.trace` $key `=` `[`\n custom<ShapedOperandList>($values, type($values), $value_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
Interfaces: ShapeAwareOpInterface
"},{"location":"reference/mlir-dialects/Flow/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Flow/#operands_29","title":"Operands:","text":"Operand Description values
variadic of ranked tensor of any type values value_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#flowtensorupdate-flowtensorupdateop","title":"flow.tensor.update
(Flow::TensorUpdateOp)","text":"Updates a tensor with the contents of another tensor
Syntax:
operation ::= `flow.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n type($update) (`{` $update_dims^ `}`)? `->`\n custom<ShapedTiedResult>(type($result), $target_dims)\n attr-dict-with-keyword\n
Updates the target tensor with the contents of the update tensor at the given offset indices.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, FLOW_StreamableOp, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Flow/#operands_30","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index start_indices
variadic of index update
ranked tensor of any type values update_dims
variadic of index"},{"location":"reference/mlir-dialects/Flow/#results_32","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/Flow/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/Flow/#dummyattr","title":"DummyAttr","text":"Syntax: #flow.dummy
"},{"location":"reference/mlir-dialects/Flow/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/Flow/#dispatchtensor","title":"dispatch.tensor","text":"A placeholder for a dispatch region input/output operand. This can be used to query the metadata about the tensor (such as its shape) as well as both load and store from the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_1","title":"dispatch.tensor","text":"A placeholder for a dispatch region input operand. This can be used to query the metadata about the tensor (such as its shape) as well as load from the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#dispatchtensor_2","title":"dispatch.tensor","text":"A placeholder for a dispatch region output operand. This can be used to query the metadata about the tensor (such as its shape) as well as store to the backing tensor representation.
"},{"location":"reference/mlir-dialects/Flow/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Flow/#channeltype","title":"ChannelType","text":"a collecive communication channel
Syntax: !flow.channel
Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.
In programs that have already been partitioned prior to being compiled there will often exist only one channel and flow.channel.default
can be used to reference it. In programs that model SPMD behavior internally channels can be created or provided by hosting applications.
"},{"location":"reference/mlir-dialects/Flow/#dummytype","title":"DummyType","text":"Syntax: !flow.dummy
"},{"location":"reference/mlir-dialects/HAL/","title":"HAL","text":""},{"location":"reference/mlir-dialects/HAL/#hal-dialect","title":"'hal' Dialect","text":"A dialect representing operations against the IREE HAL.
This can be thought of as a Vulkan-like model with all of the graphics bits chopped out.
The type set is limited to those that can be represented in the IREE HAL design: buffers and views, synchronization primitives like semaphores, and and command buffers. The intent is that if a device could implement the HAL interface the sequencer ops could run on that device, such as being able to run on a GPU via indirect command buffers.
Though this is mostly a 1:1 mapping to the iree::hal API there are some methods omitted as they are not likely to be needed in IR. It's assumed that either sequencer interfaces will encapsulate the logic (such as device resolution) or that certain features are unsafe to expose to user-defined input.
- 'hal' Dialect
- Operation definition
- Allocator ops
- hal.allocator.allocate (HAL::AllocatorAllocateOp)
- hal.allocator.import (HAL::AllocatorImportOp)
- Buffer ops
- hal.buffer.assert (HAL::BufferAssertOp)
- hal.buffer.length (HAL::BufferLengthOp)
- hal.buffer.load (HAL::BufferLoadOp)
- hal.buffer.store (HAL::BufferStoreOp)
- hal.buffer.subspan (HAL::BufferSubspanOp)
- Buffer view ops
- hal.buffer_view.assert (HAL::BufferViewAssertOp)
- hal.buffer_view.buffer (HAL::BufferViewBufferOp)
- hal.buffer_view.create (HAL::BufferViewCreateOp)
- hal.buffer_view.dim (HAL::BufferViewDimOp)
- hal.buffer_view.element_type (HAL::BufferViewElementTypeOp)
- hal.buffer_view.encoding_type (HAL::BufferViewEncodingTypeOp)
- hal.buffer_view.rank (HAL::BufferViewRankOp)
- hal.buffer_view.trace (HAL::BufferViewTraceOp)
- Channel ops
- hal.channel.create (HAL::ChannelCreateOp)
- hal.channel.rank_and_count (HAL::ChannelRankAndCountOp)
- hal.channel.split (HAL::ChannelSplitOp)
- Command buffer ops
- hal.command_buffer.begin_debug_group (HAL::CommandBufferBeginDebugGroupOp)
- hal.command_buffer.collective (HAL::CommandBufferCollectiveOp)
- hal.command_buffer.copy_buffer (HAL::CommandBufferCopyBufferOp)
- hal.command_buffer.create (HAL::CommandBufferCreateOp)
- hal.command_buffer.device (HAL::CommandBufferDeviceOp)
- hal.command_buffer.dispatch.indirect (HAL::CommandBufferDispatchIndirectOp)
- hal.command_buffer.dispatch.indirect.symbol (HAL::CommandBufferDispatchIndirectSymbolOp)
- hal.command_buffer.dispatch (HAL::CommandBufferDispatchOp)
- hal.command_buffer.dispatch.symbol (HAL::CommandBufferDispatchSymbolOp)
- hal.command_buffer.end_debug_group (HAL::CommandBufferEndDebugGroupOp)
- hal.command_buffer.execution_barrier (HAL::CommandBufferExecutionBarrierOp)
- hal.command_buffer.fill_buffer (HAL::CommandBufferFillBufferOp)
- hal.command_buffer.finalize (HAL::CommandBufferFinalizeOp)
- hal.command_buffer.push_constants (HAL::CommandBufferPushConstantsOp)
- hal.command_buffer.push_descriptor_set (HAL::CommandBufferPushDescriptorSetOp)
- Descriptor set layout ops
- hal.descriptor_set_layout.create (HAL::DescriptorSetLayoutCreateOp)
- hal.descriptor_set_layout.lookup (HAL::DescriptorSetLayoutLookupOp)
- Device ops
- hal.device.allocator (HAL::DeviceAllocatorOp)
- hal.device.query (HAL::DeviceQueryOp)
- hal.device.queue.alloca (HAL::DeviceQueueAllocaOp)
- hal.device.queue.dealloca (HAL::DeviceQueueDeallocaOp)
- hal.device.queue.execute (HAL::DeviceQueueExecuteOp)
- hal.device.queue.flush (HAL::DeviceQueueFlushOp)
- hal.device.queue.read (HAL::DeviceQueueReadOp)
- hal.device.queue.write (HAL::DeviceQueueWriteOp)
- hal.return (HAL::ReturnOp)
- Executable ops
- hal.executable.binary (HAL::ExecutableBinaryOp)
- hal.executable.calculate_workgroups (HAL::ExecutableCalculateWorkgroupsOp)
- hal.executable.condition (HAL::ExecutableConditionOp)
- hal.executable.constant.block (HAL::ExecutableConstantBlockOp)
- hal.executable.constant.load (HAL::ExecutableConstantLoadOp)
- hal.executable.create (HAL::ExecutableCreateOp)
- hal.executable_end (HAL::ExecutableEndOp)
- hal.executable.export (HAL::ExecutableExportOp)
- hal.executable.lookup (HAL::ExecutableLookupOp)
- hal.executable (HAL::ExecutableOp)
- hal.executable.source_end (HAL::ExecutableSourceEndOp)
- hal.executable.source (HAL::ExecutableSourceOp)
- hal.executable.variant_end (HAL::ExecutableVariantEndOp)
- hal.executable.variant (HAL::ExecutableVariantOp)
- Experimental ops
- hal.ex.file.from_memory (HAL::ExFileFromMemoryOp)
- hal.ex.shared_device (HAL::ExSharedDeviceOp)
- Fence ops
- hal.fence.await (HAL::FenceAwaitOp)
- hal.fence.create (HAL::FenceCreateOp)
- hal.fence.fail (HAL::FenceFailOp)
- hal.fence.join (HAL::FenceJoinOp)
- hal.fence.query (HAL::FenceQueryOp)
- hal.fence.signal (HAL::FenceSignalOp)
- Instrument ops
- hal.instrument.memory.load (HAL::InstrumentMemoryLoadOp)
- hal.instrument.memory.store (HAL::InstrumentMemoryStoreOp)
- hal.instrument.print (HAL::InstrumentPrintOp)
- hal.instrument.value (HAL::InstrumentValueOp)
- hal.instrument.workgroup (HAL::InstrumentWorkgroupOp)
- Interface ops
- hal.interface.binding.subspan (HAL::InterfaceBindingSubspanOp)
- hal.interface.constant.load (HAL::InterfaceConstantLoadOp)
- hal.interface.workgroup.count (HAL::InterfaceWorkgroupCountOp)
- hal.interface.workgroup.id (HAL::InterfaceWorkgroupIDOp)
- hal.interface.workgroup.size (HAL::InterfaceWorkgroupSizeOp)
- Pipeline layout ops
- hal.pipeline_layout.create (HAL::PipelineLayoutCreateOp)
- hal.pipeline_layout.lookup (HAL::PipelineLayoutLookupOp)
- Pseudo Ops
- hal.dispatch.extern (HAL::DispatchExternOp)
- hal.tensor.barrier (HAL::TensorBarrierOp)
- hal.tensor.export (HAL::TensorExportOp)
- hal.tensor.import (HAL::TensorImportOp)
- Attribute definition
- AffinityQueueAttr
- CollectiveAttr
- DescriptorSetBindingAttr
- DescriptorSetLayoutAttr
- DescriptorTypeAttr
- DeviceMatchArchitectureAttr
- DeviceMatchExecutableFormatAttr
- DeviceMatchFeatureAttr
- DeviceMatchIDAttr
- DeviceTargetAttr
- ExecutableObjectAttr
- ExecutableObjectsAttr
- ExecutableTargetAttr
- InterfaceBindingAttr
- MatchAllAttr
- MatchAlwaysAttr
- MatchAnyAttr
- PipelineLayoutAttr
- Type constraint definition
- allocator
- buffer
- buffer_view
- collective.channel
- command_buffer
- descriptor_set_layout
- device
- event
- executable
- fence
- buffer
- pipeline_layout
"},{"location":"reference/mlir-dialects/HAL/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HAL/#allocator-ops","title":"Allocator ops","text":"Ops for !hal.allocator
/ iree_hal_allocator_t
.
"},{"location":"reference/mlir-dialects/HAL/#halallocatorallocate-halallocatorallocateop","title":"hal.allocator.allocate
(HAL::AllocatorAllocateOp)","text":"Empty buffer allocation operation
Syntax:
operation ::= `hal.allocator.allocate` `<` $allocator `:` type($allocator) `>`\n `affinity` `(` $queue_affinity `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $result_size)\n attr-dict-with-keyword\n
Allocates a buffer of the given size from the allocator. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands","title":"Operands:","text":"Operand Description allocator
allocator queue_affinity
64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/HAL/#results","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halallocatorimport-halallocatorimportop","title":"hal.allocator.import
(HAL::AllocatorImportOp)","text":"Allocator-supported host buffer import operation
Syntax:
operation ::= `hal.allocator.import` `<` $allocator `:` type($allocator) `>`\n `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `affinity` `(` $queue_affinity `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` type($did_import) `,` type($result)\n attr-dict-with-keyword\n
Tries importing host memory backed by the given byte buffer into a device accessible !hal.buffer
. The returned buffer may be host-only and not directly usable on devices. If the mapping cannot be completed (such as trying to map the host memory as device-local on devices with discrete memory) then did_import
will indicate that the returned buffer is null.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_1","title":"Operands:","text":"Operand Description allocator
allocator queue_affinity
64-bit signless integer source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#results_1","title":"Results:","text":"Result Description did_import
1-bit signless integer result
buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-ops","title":"Buffer ops","text":"Ops for !hal.buffer
/ iree_hal_buffer_t
.
"},{"location":"reference/mlir-dialects/HAL/#halbufferassert-halbufferassertop","title":"hal.buffer.assert
(HAL::BufferAssertOp)","text":"Buffer compatibility assertion
Syntax:
operation ::= `hal.buffer.assert` `<` $buffer `:` type($buffer) `>`\n `message` `(` $message `)`\n `allocator` `(` $allocator `:` type($allocator) `)`\n `minimum_length` `(` $minimum_length `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n attr-dict-with-keyword\n
Asserts that the buffer is compatible with the given allocator and usage. Program execution will abort as if std.assert
had been used.
This only checks that the buffer can be used and not that it matches the given parameters exactly. Buffers may be from other allocators so long as the allocators are compatible (devices can address each other's memory), the type and usage contain all the requested bits (having more bits is ok), and the length is at least the requested minimum (as padding may be ignored).
"},{"location":"reference/mlir-dialects/HAL/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_2","title":"Operands:","text":"Operand Description buffer
buffer allocator
allocator minimum_length
index"},{"location":"reference/mlir-dialects/HAL/#halbufferlength-halbufferlengthop","title":"hal.buffer.length
(HAL::BufferLengthOp)","text":"Buffer byte length accessor
Syntax:
operation ::= `hal.buffer.length` `<` $buffer `:` type($buffer) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_3","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HAL/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbufferload-halbufferloadop","title":"hal.buffer.load
(HAL::BufferLoadOp)","text":"Buffer element load operation
Syntax:
operation ::= `hal.buffer.load` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `]`\n `:` type($result)\n attr-dict-with-keyword\n
Loads a value from a buffer by mapping it.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_4","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index"},{"location":"reference/mlir-dialects/HAL/#results_3","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/HAL/#halbufferstore-halbufferstoreop","title":"hal.buffer.store
(HAL::BufferStoreOp)","text":"Buffer element store operation
Syntax:
operation ::= `hal.buffer.store` `<` $target_buffer `:` type($target_buffer) `>`\n `` `[` $target_offset `]`\n `value` `(` $value `:` type($value) `)`\n attr-dict-with-keyword\n
Stores a value into a buffer by mapping it.
"},{"location":"reference/mlir-dialects/HAL/#operands_5","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target_buffer
buffer target_offset
index"},{"location":"reference/mlir-dialects/HAL/#halbuffersubspan-halbuffersubspanop","title":"hal.buffer.subspan
(HAL::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `hal.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SizeAwareOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_6","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#results_4","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#buffer-view-ops","title":"Buffer view ops","text":"Ops for !hal.buffer_view
/ iree_hal_buffer_view_t
.
"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewassert-halbufferviewassertop","title":"hal.buffer_view.assert
(HAL::BufferViewAssertOp)","text":"Buffer view contents assertion
Syntax:
operation ::= `hal.buffer_view.assert` `<` $buffer_view `:` type($buffer_view) `>`\n `message` `(` $message `)`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n attr-dict-with-keyword\n
Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if std.assert
had been used.
"},{"location":"reference/mlir-dialects/HAL/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_7","title":"Operands:","text":"Operand Description buffer_view
buffer_view element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewbuffer-halbufferviewbufferop","title":"hal.buffer_view.buffer
(HAL::BufferViewBufferOp)","text":"Buffer view buffer accessor
Syntax:
operation ::= `hal.buffer_view.buffer` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the buffer backing this view's contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_8","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_5","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewcreate-halbufferviewcreateop","title":"hal.buffer_view.create
(HAL::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `hal.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_9","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_6","title":"Results:","text":"Result Description result
buffer_view"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewdim-halbufferviewdimop","title":"hal.buffer_view.dim
(HAL::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `hal.buffer_view.dim` `<` $buffer_view `:` type($buffer_view) `>`\n `` `[` $index `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_10","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_7","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewelement_type-halbufferviewelementtypeop","title":"hal.buffer_view.element_type
(HAL::BufferViewElementTypeOp)","text":"Buffer view element type query
Syntax:
operation ::= `hal.buffer_view.element_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the element type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_11","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_8","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewencoding_type-halbufferviewencodingtypeop","title":"hal.buffer_view.encoding_type
(HAL::BufferViewEncodingTypeOp)","text":"Buffer view encoding type query
Syntax:
operation ::= `hal.buffer_view.encoding_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the encoding type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_12","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_9","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewrank-halbufferviewrankop","title":"hal.buffer_view.rank
(HAL::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `hal.buffer_view.rank` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_13","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_10","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halbuffer_viewtrace-halbufferviewtraceop","title":"hal.buffer_view.trace
(HAL::BufferViewTraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `hal.buffer_view.trace` $key `=`\n $operands `:` type($operands)\n attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.
"},{"location":"reference/mlir-dialects/HAL/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_14","title":"Operands:","text":"Operand Description operands
variadic of buffer_view"},{"location":"reference/mlir-dialects/HAL/#channel-ops","title":"Channel ops","text":"Ops for !hal.channel
/ iree_hal_channel_t
.
"},{"location":"reference/mlir-dialects/HAL/#halchannelcreate-halchannelcreateop","title":"hal.channel.create
(HAL::ChannelCreateOp)","text":"Creates a new channel for collective communication
Syntax:
operation ::= `hal.channel.create` `device` `(` $device `:` type($device) `)`\n `affinity` `(` $queue_affinity `)`\n `flags` `(` $flags `)`\n `id` `(` $id `)`\n `group` `(` $group `)`\n `rank` `(` $rank `)`\n `count` `(` $count `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a new channel with the given rank associated with the given device queue. Collective operations using this channel must only be submitted on compatible queues.
The group and ID are optional and may be null. A rank or count of -1 can be used to indicate a default inherited from the environment or device configuration.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_15","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer id
a reference counted byte buffer group
a reference counted byte buffer rank
32-bit signless integer count
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_11","title":"Results:","text":"Result Description result
collective.channel"},{"location":"reference/mlir-dialects/HAL/#halchannelrank_and_count-halchannelrankandcountop","title":"hal.channel.rank_and_count
(HAL::ChannelRankAndCountOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `hal.channel.rank_and_count` `<` $channel `:` type($channel) `>`\n `:` type($rank) `,` type($count)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
and the total participant count.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_16","title":"Operands:","text":"Operand Description channel
collective.channel"},{"location":"reference/mlir-dialects/HAL/#results_12","title":"Results:","text":"Result Description rank
32-bit signless integer count
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halchannelsplit-halchannelsplitop","title":"hal.channel.split
(HAL::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `hal.channel.split` `<` $channel `:` type($channel) `>`\n `color` `(` $color `)`\n `key` `(` $key `)`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_17","title":"Operands:","text":"Operand Description channel
collective.channel color
32-bit signless integer key
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_13","title":"Results:","text":"Result Description result
collective.channel"},{"location":"reference/mlir-dialects/HAL/#command-buffer-ops","title":"Command buffer ops","text":"Ops for !hal.command_buffer
/ iree_hal_command_buffer_t
.
"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferbegin_debug_group-halcommandbufferbegindebuggroupop","title":"hal.command_buffer.begin_debug_group
(HAL::CommandBufferBeginDebugGroupOp)","text":"Pushes a command buffer debug group label
Syntax:
operation ::= `hal.command_buffer.begin_debug_group` `<` $command_buffer `:` type($command_buffer) `>`\n `label` `(` $label `)`\n attr-dict-with-keyword\n
Pushes a new debug group with the given label. All commands between this and a mandatory matching call to hal.command_buffer.end_debug_group
will be grouped together with the given label.
"},{"location":"reference/mlir-dialects/HAL/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription label
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_18","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercollective-halcommandbuffercollectiveop","title":"hal.command_buffer.collective
(HAL::CommandBufferCollectiveOp)","text":"Command buffer collective dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.collective` `<` $command_buffer `:` type($command_buffer) `>`\n `channel` `(` $channel `:` type($channel) `)`\n `op` `(` $op `)`\n (`param` `(` $param^ `:` type($param) `)`)?\n (`send` `(` $send_buffer^ `:` type($send_buffer) `)`\n `` `[` $send_offset `,` $send_length `]`)?\n (`recv` `(` $recv_buffer^ `:` type($recv_buffer) `)`\n `` `[` $recv_offset `,` $recv_length `]`)?\n `count` `(` $element_count `)`\n attr-dict-with-keyword\n
Dispatches a collective operation defined by op using the given buffers.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/HAL/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::HAL::CollectiveAttrcollective operation and specification"},{"location":"reference/mlir-dialects/HAL/#operands_19","title":"Operands:","text":"Operand Description command_buffer
command_buffer channel
collective.channel element_count
index param
32-bit signless integer send_buffer
buffer send_offset
index send_length
index recv_buffer
buffer recv_offset
index recv_length
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercopy_buffer-halcommandbuffercopybufferop","title":"hal.command_buffer.copy_buffer
(HAL::CommandBufferCopyBufferOp)","text":"Command buffer buffer copy recording operation
Syntax:
operation ::= `hal.command_buffer.copy_buffer` `<` $command_buffer `:` type($command_buffer) `>`\n `source` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n attr-dict-with-keyword\n
Copies a range of one buffer to another.
"},{"location":"reference/mlir-dialects/HAL/#operands_20","title":"Operands:","text":"Operand Description command_buffer
command_buffer source_buffer
buffer source_offset
index target_buffer
buffer target_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_buffercreate-halcommandbuffercreateop","title":"hal.command_buffer.create
(HAL::CommandBufferCreateOp)","text":"Command buffer allocation operation
Syntax:
operation ::= `hal.command_buffer.create` `device` `(` $device `:` type($device) `)`\n `mode` `(` $modes `)`\n `categories` `(` $command_categories `)`\n (`bindings` `(` $binding_capacity^ `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a command buffer from the device pool ready to begin recording.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription modes
mlir::iree_compiler::IREE::HAL::CommandBufferModeBitfieldAttrvalid CommandBufferMode command_categories
mlir::iree_compiler::IREE::HAL::CommandCategoryBitfieldAttrvalid CommandCategory"},{"location":"reference/mlir-dialects/HAL/#operands_21","title":"Operands:","text":"Operand Description device
device binding_capacity
index"},{"location":"reference/mlir-dialects/HAL/#results_14","title":"Results:","text":"Result Description result
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdevice-halcommandbufferdeviceop","title":"hal.command_buffer.device
(HAL::CommandBufferDeviceOp)","text":"Command buffer device query operation
Syntax:
operation ::= `hal.command_buffer.device` `<` $command_buffer `:` type($command_buffer) `>`\n `:` type($device)\n attr-dict-with-keyword\n
Used during conversion to access the device used to create a command buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_22","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#results_15","title":"Results:","text":"Result Description device
device"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirect-halcommandbufferdispatchindirectop","title":"hal.command_buffer.dispatch.indirect
(HAL::CommandBufferDispatchIndirectOp)","text":"Command buffer indirect dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.dispatch.indirect` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n `` `[` $workgroups_offset `]`\n attr-dict-with-keyword\n
Dispatches an execution request with the dispatch parameters loaded from the given buffer.
"},{"location":"reference/mlir-dialects/HAL/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_23","title":"Operands:","text":"Operand Description command_buffer
command_buffer executable
executable workgroups_buffer
buffer workgroups_offset
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchindirectsymbol-halcommandbufferdispatchindirectsymbolop","title":"hal.command_buffer.dispatch.indirect.symbol
(HAL::CommandBufferDispatchIndirectSymbolOp)","text":"Command buffer indirect dispatch recording operation, using symbolref
Syntax:
operation ::= `hal.command_buffer.dispatch.indirect.symbol` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $entry_point `)`\n `workgroups` `(` $workgroups_buffer `:` type($workgroups_buffer) `)`\n `` `[` $workgroups_offset `]`\n attr-dict-with-keyword\n
Dispatches an execution request with the dispatch parameters loaded from the given buffer, using using a nested symbol reference to the entry point.
hal.command_buffer.dispatch.indirect.symbol %cmd, @executable::@target::@entry,\n workgroups = %buffer[%offset]\n
"},{"location":"reference/mlir-dialects/HAL/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_24","title":"Operands:","text":"Operand Description command_buffer
command_buffer workgroups_buffer
buffer workgroups_offset
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatch-halcommandbufferdispatchop","title":"hal.command_buffer.dispatch
(HAL::CommandBufferDispatchOp)","text":"Command buffer dispatch recording operation
Syntax:
operation ::= `hal.command_buffer.dispatch` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n attr-dict-with-keyword\n
Dispatches an execution request.
"},{"location":"reference/mlir-dialects/HAL/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HAL/#operands_25","title":"Operands:","text":"Operand Description command_buffer
command_buffer executable
executable workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferdispatchsymbol-halcommandbufferdispatchsymbolop","title":"hal.command_buffer.dispatch.symbol
(HAL::CommandBufferDispatchSymbolOp)","text":"Command buffer dispatch recording operation, using symbolref
Syntax:
operation ::= `hal.command_buffer.dispatch.symbol` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $entry_point `)`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n attr-dict-with-keyword\n
Dispatches an execution request, using a nested symbol reference to the entry point.
"},{"location":"reference/mlir-dialects/HAL/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_26","title":"Operands:","text":"Operand Description command_buffer
command_buffer workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferend_debug_group-halcommandbufferenddebuggroupop","title":"hal.command_buffer.end_debug_group
(HAL::CommandBufferEndDebugGroupOp)","text":"Pops a command buffer debug group label
Syntax:
operation ::= `hal.command_buffer.end_debug_group` `<` $command_buffer `:` type($command_buffer) `>`\n attr-dict-with-keyword\n
Pops a debug group from the stack.
"},{"location":"reference/mlir-dialects/HAL/#operands_27","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferexecution_barrier-halcommandbufferexecutionbarrierop","title":"hal.command_buffer.execution_barrier
(HAL::CommandBufferExecutionBarrierOp)","text":"Command buffer execution barrier recording operation
Syntax:
operation ::= `hal.command_buffer.execution_barrier` `<` $command_buffer `:` type($command_buffer) `>`\n `source` `(` $source_stage_mask `)`\n `target` `(` $target_stage_mask `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Defines an execution dependency between all commands recorded before the barrier and all commands recorded after the barrier. Only the stages provided will be affected.
"},{"location":"reference/mlir-dialects/HAL/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription source_stage_mask
mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage target_stage_mask
mlir::iree_compiler::IREE::HAL::ExecutionStageBitfieldAttrvalid ExecutionStage flags
mlir::iree_compiler::IREE::HAL::ExecutionBarrierFlagBitfieldAttrvalid ExecutionBarrierFlag"},{"location":"reference/mlir-dialects/HAL/#operands_28","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfill_buffer-halcommandbufferfillbufferop","title":"hal.command_buffer.fill_buffer
(HAL::CommandBufferFillBufferOp)","text":"Command buffer buffer fill recording operation
Syntax:
operation ::= `hal.command_buffer.fill_buffer` `<` $command_buffer `:` type($command_buffer) `>`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `,` $length `]`\n `pattern` `(` $pattern `:` type($pattern) `)`\n attr-dict-with-keyword\n
Fills the target buffer with the given repeating value.
"},{"location":"reference/mlir-dialects/HAL/#operands_29","title":"Operands:","text":"Operand Description command_buffer
command_buffer target_buffer
buffer target_offset
index length
index pattern
8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferfinalize-halcommandbufferfinalizeop","title":"hal.command_buffer.finalize
(HAL::CommandBufferFinalizeOp)","text":"Finalizes command buffer recording
Syntax:
operation ::= `hal.command_buffer.finalize` `<` $command_buffer `:` type($command_buffer) `>`\n attr-dict-with-keyword\n
Ends recording into the command buffer and prepares it for submission. No more commands may be recorded into the command buffer.
"},{"location":"reference/mlir-dialects/HAL/#operands_30","title":"Operands:","text":"Operand Description command_buffer
command_buffer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_constants-halcommandbufferpushconstantsop","title":"hal.command_buffer.push_constants
(HAL::CommandBufferPushConstantsOp)","text":"Command buffer push constants operation
Syntax:
operation ::= `hal.command_buffer.push_constants` `<` $command_buffer `:` type($command_buffer) `>`\n `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n `offset` `(` $offset `)`\n `values` `(` `[` $values `]` `)`\n `:` type($values)\n attr-dict-with-keyword\n
Pushes an inline set of constants that can be accessed by subsequent dispatches using a compatible pipeline layout.
Push constants are always 4-byte values and treated as opaque, meaning that they may be bit-casted floats, bit-packed booleans, etc.
"},{"location":"reference/mlir-dialects/HAL/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription offset
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_31","title":"Operands:","text":"Operand Description command_buffer
command_buffer pipeline_layout
pipeline_layout values
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halcommand_bufferpush_descriptor_set-halcommandbufferpushdescriptorsetop","title":"hal.command_buffer.push_descriptor_set
(HAL::CommandBufferPushDescriptorSetOp)","text":"Command buffer descriptor set push binding operation
Syntax:
operation ::= `hal.command_buffer.push_descriptor_set` `<` $command_buffer `:` type($command_buffer) `>`\n `layout` `(` $pipeline_layout `:` type($pipeline_layout) `)`\n `` `[` $set `]`\n `bindings` `(` `[`\n custom<DescriptorSetBindings>($binding_ordinals,\n $binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Pushes an inline-defined descriptor set to the command buffer. The provided buffers may either be HAL buffers or indirect references into the command buffer binding table.
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/HAL/#operands_32","title":"Operands:","text":"Operand Description command_buffer
command_buffer pipeline_layout
pipeline_layout set
index binding_ordinals
variadic of index binding_buffers
variadic of index or buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HAL/#descriptor-set-layout-ops","title":"Descriptor set layout ops","text":"Ops for !hal.descriptor_set_layout
/ iree_hal_descriptor_set_layout_t
.
"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutcreate-haldescriptorsetlayoutcreateop","title":"hal.descriptor_set_layout.create
(HAL::DescriptorSetLayoutCreateOp)","text":"Creates a descriptor set layout
Syntax:
operation ::= `hal.descriptor_set_layout.create` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `bindings` `(` $bindings `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a descriptor set layout that defines the bindings used within a set. The same descriptor set layout may be shared with many different executable layouts and by doing so some runtime binding overhead when switching between executables that use the same set layouts can be reduced.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags bindings
::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_33","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_16","title":"Results:","text":"Result Description result
descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#haldescriptor_set_layoutlookup-haldescriptorsetlayoutlookupop","title":"hal.descriptor_set_layout.lookup
(HAL::DescriptorSetLayoutLookupOp)","text":"Descriptor set layout cache lookup pseudo-op
Syntax:
operation ::= `hal.descriptor_set_layout.lookup` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `bindings` `(` $bindings `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized descriptor set layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlagsAttrvalid DescriptorSetLayout flags bindings
::mlir::ArrayAttrHAL descriptor set layout binding array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_34","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_17","title":"Results:","text":"Result Description result
descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#device-ops","title":"Device ops","text":"Ops for !hal.device
/ iree_hal_device_t
.
"},{"location":"reference/mlir-dialects/HAL/#haldeviceallocator-haldeviceallocatorop","title":"hal.device.allocator
(HAL::DeviceAllocatorOp)","text":"Device allocator accessor operation
Syntax:
operation ::= `hal.device.allocator` `<` $device `:` type($device) `>` `:` type($result) attr-dict-with-keyword\n
Returns the allocator that can be used to allocate buffers compatible with the device.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_35","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_18","title":"Results:","text":"Result Description result
allocator"},{"location":"reference/mlir-dialects/HAL/#haldevicequery-haldevicequeryop","title":"hal.device.query
(HAL::DeviceQueryOp)","text":"Returns a runtime configuration parameter from the device
Syntax:
operation ::= `hal.device.query` `<` $device `:` type($device) `>`\n `key` `(` $category `:` `` `:` $key `)`\n `:` type($ok) `,` type($value)\n (`=` $default_value^)?\n attr-dict-with-keyword\n
Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.
This is roughly equivalent to the sysconf
linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent. If there is a HAL match attribute (#hal.device.match.*
) or op (hal.device.match.*
) prefer to use that in order to get compile-time propagation when the target is specified and elide the runtime query and get compile-time verification when a runtime query is required.
Users of the op must check the ok
result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with ex.
when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.
Well-known keys:
-
hal.executable.format :: {some format} Returns 1 if the given format is supported by the device loader.
-
hal.device :: concurrency The maximum concurrently executable submissions, mapping roughly to the queue count. The actual concurrency available may be less than this based on dynamic runtime parameters such as power/thermal modes, quota limits, or user choice.
-
hal.dispatch :: concurrency The maximum concurrently executable workgroups for a particular dispatch. The actual concurrency available may be less depending on device state.
Traits: AlwaysSpeculatableImplTrait, HAL_DeviceQuery
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription category
::mlir::StringAttrstring attribute key
::mlir::StringAttrstring attribute default_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/HAL/#operands_36","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_19","title":"Results:","text":"Result Description ok
1-bit signless integer value
any type"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuealloca-haldevicequeueallocaop","title":"hal.device.queue.alloca
(HAL::DeviceQueueAllocaOp)","text":"Allocates a queue-ordered transient buffer
Syntax:
operation ::= `hal.device.queue.alloca` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `pool` `(` $pool `)`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $result_size)\n attr-dict-with-keyword\n
Returns a queue-ordered transient buffer that will be available for use when the signal fence is reached. The allocation will not be made until the wait fence has been reached.
The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
The buffer handle will remain live so long as there are retainers but the contents are undefined before the allocation signal fence has been signaled and after the deallocation wait fence has been reached.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/HAL/#operands_37","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence pool
64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/HAL/#results_20","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuedealloca-haldevicequeuedeallocaop","title":"hal.device.queue.dealloca
(HAL::DeviceQueueDeallocaOp)","text":"Deallocates a queue-ordered transient buffer
Syntax:
operation ::= `hal.device.queue.dealloca` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `buffer` `(` $buffer `:` type($buffer) `)`\n attr-dict-with-keyword\n
Deallocates a queue-ordered transient buffer. The deallocation will not be made until the wait fence has been reached and once the storage is available for reuse the signal fence will be signaled.
After deallocation the contents of the buffer may still be accessible but will have undefined contents as other operations reuse the memory.
"},{"location":"reference/mlir-dialects/HAL/#operands_38","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence buffer
buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueexecute-haldevicequeueexecuteop","title":"hal.device.queue.execute
(HAL::DeviceQueueExecuteOp)","text":"Enqueues command buffer execution
Syntax:
operation ::= `hal.device.queue.execute` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n (`commands` `(` `[` $command_buffers^ `]` `)`)?\n attr-dict-with-keyword\n
Executes one or more command buffers on a device queue. The command buffers are executed in order as if they were recorded as one. No commands will execute until the wait fence has been reached and the signal fence will be signaled when all commands have completed.
"},{"location":"reference/mlir-dialects/HAL/#operands_39","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence command_buffers
variadic of command_buffer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueflush-haldevicequeueflushop","title":"hal.device.queue.flush
(HAL::DeviceQueueFlushOp)","text":"Flushes locally-pending submissions to the queue
Syntax:
operation ::= `hal.device.queue.flush` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n attr-dict-with-keyword\n
Flushes any locally-pending submissions in the queue. When submitting many queue operations this can be used to eagerly flush earlier submissions while later ones are still being constructed. This may be a no-op.
"},{"location":"reference/mlir-dialects/HAL/#operands_40","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#haldevicequeueread-haldevicequeuereadop","title":"hal.device.queue.read
(HAL::DeviceQueueReadOp)","text":"Reads a segment from a file into a device buffer
Syntax:
operation ::= `hal.device.queue.read` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` $source_file `:` type($source_file) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_buffer `:` type($target_buffer) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Enqueues a file read operation that streams a segment of the source file defined by the source offset and length into the target HAL buffer at the specified target offset. The queue affinity should be set to where the target buffer will be consumed. The source file must have read permission and the target buffer must have transfer-target usage. Read failure will result in propagated semaphore failure or device loss.
"},{"location":"reference/mlir-dialects/HAL/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_41","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_file
buffer source_offset
64-bit signless integer target_buffer
buffer target_offset
index length
index"},{"location":"reference/mlir-dialects/HAL/#haldevicequeuewrite-haldevicequeuewriteop","title":"hal.device.queue.write
(HAL::DeviceQueueWriteOp)","text":"Writes a segment from a device buffer into a file
Syntax:
operation ::= `hal.device.queue.write` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `]`\n `target` `(` $target_file `:` type($target_file) `)`\n `` `[` $target_offset `]`\n `length` `(` $length `)`\n `flags` `(` $flags `)`\n attr-dict-with-keyword\n
Enqueues a file write operation that streams a segment of the source HAL buffer defined by the source offset and length into the target file at the specified target offset. The queue affinity should be set to where the source buffer was produced. The source buffer must have transfer-source usage and the target file must have write permission. Write failure will result in propagated semaphore failure or device loss.
"},{"location":"reference/mlir-dialects/HAL/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_42","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_buffer
buffer source_offset
index target_file
buffer target_offset
64-bit signless integer length
index"},{"location":"reference/mlir-dialects/HAL/#halreturn-halreturnop","title":"hal.return
(HAL::ReturnOp)","text":"Return from a hal.* region
Syntax:
operation ::= `hal.return` ($operands^ `:` type($operands))? attr-dict\n
Returns the given values from the region and back to the host code.
Traits: Terminator
"},{"location":"reference/mlir-dialects/HAL/#operands_43","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#executable-ops","title":"Executable ops","text":"Ops for !hal.executable
/ iree_hal_executable_t
.
"},{"location":"reference/mlir-dialects/HAL/#halexecutablebinary-halexecutablebinaryop","title":"hal.executable.binary
(HAL::ExecutableBinaryOp)","text":"Compiled executable binary data
Syntax:
operation ::= `hal.executable.binary` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n
A compiled executable binary with an optional nested module containing the IR prior to serialization (for debugging).
Traits: HasParent
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute format
::mlir::StringAttrstring attribute data
::mlir::DenseIntElementsAttr8-bit signless integer elements attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablecalculate_workgroups-halexecutablecalculateworkgroupsop","title":"hal.executable.calculate_workgroups
(HAL::ExecutableCalculateWorkgroupsOp)","text":"Calculates workgroup count from workload for an exported function
Syntax:
operation ::= `hal.executable.calculate_workgroups` `device` `(` $device `:` type($device) `)`\n `target` `(` $entry_point `)`\n (`workload` `(` `[` $workload^ `]` `)`)?\n `:` type($workgroup_x) `,` type($workgroup_y) `,` type($workgroup_z)\n attr-dict-with-keyword\n
Calculates the workgroup count (grid XYZ) based on the given workload using the workgroup count calculation region of the target hal.executable.export
op.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_44","title":"Operands:","text":"Operand Description device
device workload
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_21","title":"Results:","text":"Result Description workgroup_x
index workgroup_y
index workgroup_z
index"},{"location":"reference/mlir-dialects/HAL/#halexecutablecondition-halexecutableconditionop","title":"hal.executable.condition
(HAL::ExecutableConditionOp)","text":"Host code to determine if the executable is enabled
Variants are selected based on their target and this optional condition op that returns true if the variant is valid for use on the provided runtime !hal.device
. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantblock-halexecutableconstantblockop","title":"hal.executable.constant.block
(HAL::ExecutableConstantBlockOp)","text":"Executable constant block initializer
Initializes one or more constants in the executable constant block by returning one value per identified constant. Each constant block is evaluated on the host prior to instantiating the executable for a given device and allows for the executable to be specialized based on device capabilities and limits.
The keys specified are unique per variant and will be deduplicated across multiple constant blocks when present. They are only used during lowering and will not survive to runtime so they need only have descriptive enough names to avoid collisions and represent the semantics of the value.
Constant values can be loaded in the device code with the hal.executable.constant.load
op:
hal.executable.variant public @target {\n hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as (\"foo\", \"bar\") {\n %0 = hal.device.query<%device> key(\"some.device.prop\")...\n %1 = hal.device.query<%device> key(\"another.device.prop\")...\n hal.return %0, %1 : i32, i32\n }\n builtin.module {\n func @dispatch0() {\n %0 = hal.executable.constant.load \"foo\" : i32\n %1 = hal.executable.constant.load \"bar\" : i32\n return\n }\n }\n}\n
Each target backend will implement the constant initialization and access in a way compatible with its execution model. Examples: - CPU: read-only buffer initialized on load and passed to each dispatch - CUDA: read-only buffer initialized on load and passed to each dispatch - SPIR-V: specialization constants - Metal: function constants - WebGPU: pipeline-overridable constants
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type keys
::mlir::ArrayAttrarray attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/HAL/#halexecutableconstantload-halexecutableconstantloadop","title":"hal.executable.constant.load
(HAL::ExecutableConstantLoadOp)","text":"Loads a constant value from the executable constant block
Syntax:
operation ::= `hal.executable.constant.load` $key attr-dict `:` type($result)\n
Loads a scalar constant value from the static executable constant block. The value provided by a constant block with the given key will be loaded and bitcast (possibly with truncation or zero-extension) to the result type.
Note that backends are allowed to implement their own mechanisms for referencing constant block values and this is provided only as a default for those not needing special behavior.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#results_22","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halexecutablecreate-halexecutablecreateop","title":"hal.executable.create
(HAL::ExecutableCreateOp)","text":"Creates an executable
Syntax:
operation ::= `hal.executable.create` `device` `(` $device `:` type($device) `)`\n `target` `(` $executable_target `)`\n `layouts` `(` `[` $layouts `]` `)`\n (`constants` `(` `[` $constants^ `]` `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Creates a target-dependent executable cached on the provided device. Entry points contained within the executable can be dispatched using the resulting executable handle.
Depending on the driver creation may take a non-trivial amount of time (such as when JITing/etc). As the cache is internally synchronized callers can issue preparation requests from multiple threads - even for the same executables - and calls will block until preparation completes.
Optional constants provide for specialization of the executable based on runtime-derived parameters.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription executable_target
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_45","title":"Operands:","text":"Operand Description device
device layouts
variadic of pipeline_layout constants
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_23","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable_end-halexecutableendop","title":"hal.executable_end
(HAL::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `hal.executable_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutableexport-halexecutableexportop","title":"hal.executable.export
(HAL::ExecutableExportOp)","text":"
Executable entry point declaration
An entry point exported by the executable with statically-available information describing the IO interface it uses and other dispatch metadata.
The workgroup_count
region represents the computation that returns the number of workgroups to use in the 3D grid dispatch. The arguments to the region represents the workload as captured by each dispatch. It returns the number of workgroups along x, y, and z.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrsize_t layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablelookup-halexecutablelookupop","title":"hal.executable.lookup
(HAL::ExecutableLookupOp)","text":"Executable cache lookup pseudo-op
Syntax:
operation ::= `hal.executable.lookup` `device` `(` $device `:` type($device) `)`\n `executable` `(` $executable `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription executable
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HAL/#operands_46","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_24","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HAL/#halexecutable-halexecutableop","title":"hal.executable
(HAL::ExecutableOp)","text":"Target-specific executable module
Syntax:
operation ::= `hal.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module representing a target-specific compiled kernel/shader/etc.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource_end-halexecutablesourceendop","title":"hal.executable.source_end
(HAL::ExecutableSourceEndOp)","text":"Terminator pseudo-op for the executable source op
Syntax:
operation ::= `hal.executable.source_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutablesource-halexecutablesourceop","title":"hal.executable.source
(HAL::ExecutableSourceOp)","text":"
Generic source contents of an executable op
Syntax:
operation ::= `hal.executable.source` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n ``\n $body\n
This is an unspecialized source representation of an executable module without an assigned target. This is useful for hand-authoring executables prior to device specification.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute objects
::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant_end-halexecutablevariantendop","title":"hal.executable.variant_end
(HAL::ExecutableVariantEndOp)","text":"Terminator pseudo-op for the executable variant op
Syntax:
operation ::= `hal.executable.variant_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/HAL/#halexecutablevariant-halexecutablevariantop","title":"hal.executable.variant
(HAL::ExecutableVariantOp)","text":"
Target-specific variant of an executable op
Syntax:
operation ::= `hal.executable.variant` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n `target` `(` $target `)`\n (`objects` `(` $objects^ `)` )?\n attr-dict-with-keyword\n $body\n
The target IR for the executable. This can be preserved for debugging but is usually removed during transformation.
Variants are selected based on their target and an optional condition op that returns true if the variant is valid for use on the provided runtime !hal.device
. If no variants within an executable are valid then loading will fail at runtime. If multiple variants are valid the first valid one found will be loaded and used for execution.
Traits: HasParent, IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/HAL/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute target
::mlir::iree_compiler::IREE::HAL::ExecutableTargetAttrgeneric executable target specification objects
::mlir::ArrayAttrHAL executable object references"},{"location":"reference/mlir-dialects/HAL/#experimental-ops","title":"Experimental ops","text":"Temporary hack ops expected to be removed in the future.
"},{"location":"reference/mlir-dialects/HAL/#halexfilefrom_memory-halexfilefrommemoryop","title":"hal.ex.file.from_memory
(HAL::ExFileFromMemoryOp)","text":"Creates a file mapped into a byte range of a host buffer
Syntax:
operation ::= `hal.ex.file.from_memory` `device` `(` $device `:` type($device) `)`\n `affinity` `(` $queue_affinity `)`\n `access` `(` $access `)`\n `buffer` `(` $buffer `:` type($buffer) `)`\n `` `[` $offset `for` $length `]`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a file handle that is backed by the given buffer
contents. Behavior is undefined if the buffer contents change while the accesses are in-flight.
Experimental as the exact interface for getting files from module contents still needs iteration. Most hardware APIs require a file descriptor or native platform handle but here we only have host pointers. When memory-mapped some systems allow for retrieval of the platform handle from a virtual address (GetMappedFileNameA/posix_mem_offset) but the APIs are sketchy and likely slow. Instead we should probably have a way to query for a file handle derived from the calling module by stack-walking and asking the VM module for its handle. Until we can figure this out this method will be marked epxerimental.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription access
mlir::iree_compiler::IREE::HAL::MemoryAccessBitfieldAttrvalid MemoryAccess"},{"location":"reference/mlir-dialects/HAL/#operands_47","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer buffer
a reference counted byte buffer offset
index length
index flags
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_25","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HAL/#halexshared_device-halexshareddeviceop","title":"hal.ex.shared_device
(HAL::ExSharedDeviceOp)","text":"Syntax:
operation ::= `hal.ex.shared_device` attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#results_26","title":"Results:","text":"Result Description result
device"},{"location":"reference/mlir-dialects/HAL/#fence-ops","title":"Fence ops","text":"Ops for !hal.fence
/ iree_hal_fence_t
.
"},{"location":"reference/mlir-dialects/HAL/#halfenceawait-halfenceawaitop","title":"hal.fence.await
(HAL::FenceAwaitOp)","text":"Asynchronous fence wait operation
Syntax:
operation ::= `hal.fence.await` `until` `(` `[` $fences `]` `)`\n `timeout_millis` `(` $timeout_millis `)`\n `:` type($status)\n attr-dict-with-keyword\n
Yields the caller until all fences is reached. Returns the status
of the fence after the wait, with a non-zero value indicating failure.
Traits: Util_YieldPoint
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_48","title":"Operands:","text":"Operand Description timeout_millis
32-bit signless integer fences
variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_27","title":"Results:","text":"Result Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencecreate-halfencecreateop","title":"hal.fence.create
(HAL::FenceCreateOp)","text":"Creates an unsignaled fence
Syntax:
operation ::= `hal.fence.create` `device` `(` $device `:` type($device) `)`\n `flags` `(` $flags `)`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a fence that defines a point in time. By default fences will remain unsignaled unless they are explicitly signaled with hal.fence.signal
or asynchronously signaled by the device by passing them as an operand to queue submission ops.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/HAL/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription flags
mlir::iree_compiler::IREE::HAL::FenceFlagBitfieldAttrvalid FenceFlag"},{"location":"reference/mlir-dialects/HAL/#operands_49","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_28","title":"Results:","text":"Result Description result
fence"},{"location":"reference/mlir-dialects/HAL/#halfencefail-halfencefailop","title":"hal.fence.fail
(HAL::FenceFailOp)","text":"Fence failure operation
Syntax:
operation ::= `hal.fence.fail` `<` $fence `:` type($fence) `>`\n `status` `(` $status `)`\n attr-dict-with-keyword\n
Signals the fence with a failure. The status
will be returned from each timepoint semaphores hal.semaphore.query
and hal.semaphore.signal
for the lifetime of each semaphore.
"},{"location":"reference/mlir-dialects/HAL/#operands_50","title":"Operands:","text":"Operand Description fence
fence status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencejoin-halfencejoinop","title":"hal.fence.join
(HAL::FenceJoinOp)","text":"Creates a fence from the given timepoints
Syntax:
operation ::= `hal.fence.join` `at` `(` `[` $fences `]` `)`\n `->` type($result)\n attr-dict-with-keyword\n
Returns a fence that joins the input fences as a wait-all operation.
Interfaces: OpAsmOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_51","title":"Operands:","text":"Operand Description fences
variadic of fence"},{"location":"reference/mlir-dialects/HAL/#results_29","title":"Results:","text":"Result Description result
fence"},{"location":"reference/mlir-dialects/HAL/#halfencequery-halfencequeryop","title":"hal.fence.query
(HAL::FenceQueryOp)","text":"Fence query operation
Syntax:
operation ::= `hal.fence.query` `<` $fence `:` type($fence) `>`\n `:` type($status)\n attr-dict-with-keyword\n
Queries whether the fence has been reached and its status. Returns OK if the fence has been signaled successfully, DEFERRED if it is unsignaled, and otherwise an error indicating the failure.
"},{"location":"reference/mlir-dialects/HAL/#operands_52","title":"Operands:","text":"Operand Description fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_30","title":"Results:","text":"Result Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#halfencesignal-halfencesignalop","title":"hal.fence.signal
(HAL::FenceSignalOp)","text":"Fence signal operation
Syntax:
operation ::= `hal.fence.signal` `<` $fence `:` type($fence) `>`\n attr-dict-with-keyword\n
Signals the fence to indicate that the timepoints contained have been reached. Waiting work may begin immediately.
"},{"location":"reference/mlir-dialects/HAL/#operands_53","title":"Operands:","text":"Operand Description fence
fence"},{"location":"reference/mlir-dialects/HAL/#instrument-ops","title":"Instrument ops","text":"Ops for !hal.instrument.*
.
"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemoryload-halinstrumentmemoryloadop","title":"hal.instrument.memory.load
(HAL::InstrumentMemoryLoadOp)","text":"Emits a memory load instrumentation event
Syntax:
operation ::= `hal.instrument.memory.load` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $base `[` $indices `]` `,` $loadValue\n attr-dict `:` type($base) `,` type($result)\n
Emits a workgroup-specific memory load event indicating that a number of bytes from the given resolved pointer have been loaded by the workgroup.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_54","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index loadValue
any type base
memref of any type values indices
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_31","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentmemorystore-halinstrumentmemorystoreop","title":"hal.instrument.memory.store
(HAL::InstrumentMemoryStoreOp)","text":"Emits a memory store instrumentation event
Syntax:
operation ::= `hal.instrument.memory.store` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $base `[` $indices `]` `,` $storeValue\n attr-dict `:` type($base) `,` type($result)\n
Emits a workgroup-specific memory store event indicating that a number of bytes have been stored to the given resolved pointer by the workgroup.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#operands_55","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index storeValue
any type base
memref of any type values indices
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_32","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentprint-halinstrumentprintop","title":"hal.instrument.print
(HAL::InstrumentPrintOp)","text":"Emits a human-readable printf-style string event
Syntax:
operation ::= `hal.instrument.print` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $format (`*` `(` $values^ `:` type($values) `)`)?\n attr-dict\n
Formats a string using a limited subset of printf format specifiers and the provided values and then emits an iree_instrument_dispatch_print_t
event. Final formatted string lengths may be limited to as much as 1024 characters and should be kept as small as possible to avoid easily exceeding the instrumentation storage buffers with redundant strings.
"},{"location":"reference/mlir-dialects/HAL/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_56","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index values
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentvalue-halinstrumentvalueop","title":"hal.instrument.value
(HAL::InstrumentValueOp)","text":"Emits a scalar value instrumentation event
Syntax:
operation ::= `hal.instrument.value` `` `[` $buffer `:` type($buffer) `for` $workgroupKey `]`\n $ordinal `=` $operand attr-dict `:` type($operand)\n
Emits a workgroup-specific typed value with the given workgroup-relative ordinal.
This op will be preserved even if the output is not used as it is only for debugging purposes.
"},{"location":"reference/mlir-dialects/HAL/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription ordinal
::mlir::IntegerAttr8-bit integer attribute"},{"location":"reference/mlir-dialects/HAL/#operands_57","title":"Operands:","text":"Operand Description buffer
memref of any type values workgroupKey
index operand
any type"},{"location":"reference/mlir-dialects/HAL/#results_33","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinstrumentworkgroup-halinstrumentworkgroupop","title":"hal.instrument.workgroup
(HAL::InstrumentWorkgroupOp)","text":"Emits a dispatch workgroup instrumentation event
Syntax:
operation ::= `hal.instrument.workgroup` `` `[` $buffer `:` type($buffer) `]`\n `dispatch` `(` $dispatchId `)`\n attr-dict `:` type($workgroupKey)\n
Emits an iree_instrument_dispatch_workgroup_t
event into the instrumentation stream. The workgroup event identifies the unique dispatch, its workgroup count, and the ID of the emitting workgroup within the dispatch. Optionally targets that support querying the processor ID executing the workgroup can attach that information for tracking purposes.
On targets such as CPUs where entire workgroups execute as atomic units only one workgroup event should be emitted. On targets such as GPUs where there may be multiple invocations executing as part of a single workgroup only the first invocation within the workgroup should emit the workgroup event (by checking if the LocalInvocationIndex or threadIdx == 0, etc).
The resulting workgroup key is used by subsequent workgroup-specific instrumentation events.
"},{"location":"reference/mlir-dialects/HAL/#operands_58","title":"Operands:","text":"Operand Description buffer
memref of any type values dispatchId
32-bit signless integer"},{"location":"reference/mlir-dialects/HAL/#results_34","title":"Results:","text":"Result Description workgroupKey
index"},{"location":"reference/mlir-dialects/HAL/#interface-ops","title":"Interface ops","text":"Ops for !hal.interface.*
.
"},{"location":"reference/mlir-dialects/HAL/#halinterfacebindingsubspan-halinterfacebindingsubspanop","title":"hal.interface.binding.subspan
(HAL::InterfaceBindingSubspanOp)","text":"Returns an alias to a subspan of interface binding data
Syntax:
operation ::= `hal.interface.binding.subspan` `set` `(` $set `)`\n `binding` `(` $binding `)`\n `type` `(` custom<DescriptorType>($descriptor_type) `)`\n (`alignment` `(` $alignment^ `)`)?\n (`offset` `(` $byte_offset^ `)`)?\n (`flags` `(` $descriptor_flags^ `)`)?\n attr-dict `:` type($result) (`{` $dynamic_dims^ `}`)?\n
Returns a subspan of an interface binding storage buffer in a generic type. The exact shape, type, and alignment of the returned type are defined by the result type (tensor, memref, etc).
An optional alignment indicates the byte alignment of the base binding resource. Note that the byte offset is added to the base and the alignment will be the minimum of the two.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription set
::mlir::IntegerAttrindex attribute binding
::mlir::IntegerAttrindex attribute descriptor_type
::mlir::iree_compiler::IREE::HAL::DescriptorTypeAttrvalid DescriptorType alignment
::mlir::IntegerAttrindex attribute descriptor_flags
::mlir::iree_compiler::IREE::HAL::DescriptorFlagsAttrvalid Descriptor flags"},{"location":"reference/mlir-dialects/HAL/#operands_59","title":"Operands:","text":"Operand Description byte_offset
index dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_35","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceconstantload-halinterfaceconstantloadop","title":"hal.interface.constant.load
(HAL::InterfaceConstantLoadOp)","text":"Loads a constant value from the interface constant block
Syntax:
operation ::= `hal.interface.constant.load` `` `[` $index `]`\n (`alignment` `(` $alignment^ `)`)?\n (`values` `(` $values^ `)`)?\n attr-dict `:` type($result)\n
Loads a scalar constant value from an executable IO push constant block. The value will be loaded from the given constant offset and will be bitcast (possibly with truncation or zero-extension) to the result type.
An optional alignment indicates the byte alignment of potential values for the constant when it could be determined from analysis. If omitted the value may be anything and its interpretation is up to the usage. This is intended to provide pointer alignment-like semantics to constants that are used to index into binding resources.
An optional set of values indicates all possible values that can be passed to the constant from all dispatch sites in the program. If omitted the value may be from an unanalyzable source (outside of the program, indirect, etc) and must be assumed to have any value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrsize_t alignment
::mlir::IntegerAttrindex attribute values
::mlir::ArrayAttrarray attribute"},{"location":"reference/mlir-dialects/HAL/#results_36","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupcount-halinterfaceworkgroupcountop","title":"hal.interface.workgroup.count
(HAL::InterfaceWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `hal.interface.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid. Matches what was passed to the hal.command_buffer.dispatch
command (or what was indirectly specified).
Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable.
%x = hal.interface.workgroup.count[0] : index\n%y = hal.interface.workgroup.count[1] : index\n%z = hal.interface.workgroup.count[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_37","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupid-halinterfaceworkgroupidop","title":"hal.interface.workgroup.id
(HAL::InterfaceWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `hal.interface.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current tile in the range of [0, hal.interface.workgroup.count)
along each XYZ dimension.
Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable.
%x = hal.interface.workgroup.id[0] : index\n%y = hal.interface.workgroup.id[1] : index\n%z = hal.interface.workgroup.id[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_38","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#halinterfaceworkgroupsize-halinterfaceworkgroupsizeop","title":"hal.interface.workgroup.size
(HAL::InterfaceWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `hal.interface.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable.
%x = hal.interface.workgroup.size[0] : index\n%y = hal.interface.workgroup.size[1] : index\n%z = hal.interface.workgroup.size[2] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#results_39","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HAL/#pipeline-layout-ops","title":"Pipeline layout ops","text":"Ops for !hal.pipeline_layout
/ iree_hal_pipeline_layout_t
.
"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutcreate-halpipelinelayoutcreateop","title":"hal.pipeline_layout.create
(HAL::PipelineLayoutCreateOp)","text":"Creates an pipeline layout
Syntax:
operation ::= `hal.pipeline_layout.create` `device` `(` $device `:` type($device) `)`\n `push_constants` `(` $push_constants `)`\n `layouts` `(` `[` $set_layouts `]` `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates an pipeline layout from the given descriptor sets and push constant required size. Pipeline layouts can be shared across any executable that uses the same layout and push constant information. Sharing the layout between executables will reduce runtime binding overhead and it is often worth the cost to allow a small number of unused bindings in one executable such that it can share layouts with others that will be scheduled adjacent to it.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription push_constants
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HAL/#operands_60","title":"Operands:","text":"Operand Description device
device set_layouts
variadic of descriptor_set_layout"},{"location":"reference/mlir-dialects/HAL/#results_40","title":"Results:","text":"Result Description result
pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#halpipeline_layoutlookup-halpipelinelayoutlookupop","title":"hal.pipeline_layout.lookup
(HAL::PipelineLayoutLookupOp)","text":"Pipeline layout cache lookup pseudo-op
Syntax:
operation ::= `hal.pipeline_layout.lookup` `device` `(` $device `:` type($device) `)`\n `layout` `(` $layout `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized pipeline layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification"},{"location":"reference/mlir-dialects/HAL/#operands_61","title":"Operands:","text":"Operand Description device
device"},{"location":"reference/mlir-dialects/HAL/#results_41","title":"Results:","text":"Result Description result
pipeline_layout"},{"location":"reference/mlir-dialects/HAL/#pseudo-ops","title":"Pseudo Ops","text":"Pseudo ops for conversion support.
"},{"location":"reference/mlir-dialects/HAL/#haldispatchextern-haldispatchexternop","title":"hal.dispatch.extern
(HAL::DispatchExternOp)","text":"A dispatch of workgroups across a 3-dimensional grid
Syntax:
operation ::= `hal.dispatch.extern` $export\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n `count` `` custom<WorkgroupCountRegion>($workgroup_count)\n `layout` `(` $layout `)`\n (`bindings` `(` $bindings^ `)`)?\n `objects` `(` $objects `)`\n attr-dict-with-keyword\n
Dispatches some number of workgroups across a 3-dimensional grid using a function defined externally in one or more referenced objects. Objects are declared per executable target and selected automatically during linking based on where the dispatch is used. Semantically this is equivalent to a flow.dispatch.workgroups
but with the workgroup region invisible to the compiler. See hal.executable
for more information about object linkage.
Note that since this happens at tensor level the dispatch operation has value semantics: some tensors (and optionally other primitive types) are consumed and one or more new result tensors are produced. Inside each workgroup, however, the input and output tensors are available for arbitrary loads and stores. In many cases each workgroup will load some particular tile(s) from the input tensors and store some particular tile(s) to the output tensors unique to that workgroup. Though it's possible for multiple workgroups to load the same regions of the input tensors behavior is undefined if multiple workgroups store to the same regions of the output tensors. Codegen guarantees this behavior but when sourcing externally authored dispatch functions it's critical that this behavior is observed.
Though the representation is similar to the GPU-style grid dispatch model here we still have not yet allocated buffers, determined the target device for execution, or even completed fully resolving shapes/types/etc. Because of this it's important that the workgroup body use the platform-dependent primitives for accessing workgroup ID, size, and count intrinsics instead of hardcoding them to a particular set of values. Assume that any workgroup dispatch may end up being specialized for several different target devices and even several different variants for a particular target device (differing workgroup sizes, etc). To aid deduplication code producing these external dispatches should try not to specialize early for particular shapes and instead emit the most generic code possible as having 500 slightly different hal.dispatch.extern
ops pointing at the same object file is likely to require 500 copies of the object instead of 500 calls to the same object.
Because at this point in the layering devices have not yet been selected the workgroup count cannot be fully evaluated. Instead workload parameters are captured that are then passed to a function that when later evaluated computes the actual workgroup count based on target information. The workload is not limited to the 3D XYZ grid dispatch of the workgroup count and can contain any number of parameters used to compute it. If workgroup size or distribution varies based on the target device a !hal.device
argument can be used by the workgroup count calculation region to factor in device parameters. See hal.device.query
for more information on how to query information.
%r = hal.dispatch.extern \"some_function\"[%c5, %c5](%0, %1)\n : (tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>\n ...\n
The number of results of the operation is equal to the number of results in the type signature ((tensor<5x5xf32>, tensor<5xf32>) -> tensor<5x5xf32>
). Each tensor argument and result in the type signature has a corresponding pipeline layout slot and must be declared. If multiple arguments or results share the same layout slot they can be aliased using the bindings
attribute and otherwise each is assumed unique.
There are no arguments
operands for results, but a result can be tied an argument by writing the argument operand's SSA value instead of its type: E.g., in the above example, -> %0
would tie the first argument to the result. In that case, there would be no separate block argument for the result.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, IsolatedFromAbove
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription export
::mlir::StringAttrstring attribute layout
::mlir::iree_compiler::IREE::HAL::PipelineLayoutAttrexecutable entry point layout specification objects
::mlir::iree_compiler::IREE::HAL::ExecutableObjectsAttrtarget-specific object file references workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute bindings
::mlir::ArrayAttrHAL binding array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/HAL/#operands_62","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/HAL/#results_42","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/HAL/#haltensorbarrier-haltensorbarrierop","title":"hal.tensor.barrier
(HAL::TensorBarrierOp)","text":"Signals a fence when all tensors are available
Syntax:
operation ::= `hal.tensor.barrier` `join` `` `(` $sources `:` type($sources) `)`\n `=` `` `>`\n $signal_fence `:` type($signal_fence)\n attr-dict-with-keyword\n
Defines a barrier that is used to indicate availability of an entire set of tensors by signaling a fence. The source tensors are returned for chaining.
Interfaces: TiedOpInterface
"},{"location":"reference/mlir-dialects/HAL/#operands_63","title":"Operands:","text":"Operand Description sources
variadic of tensor of any type values signal_fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_43","title":"Results:","text":"Result Description results
variadic of tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#haltensorexport-haltensorexportop","title":"hal.tensor.export
(HAL::TensorExportOp)","text":"Exports a tensor to a HAL buffer view
Syntax:
operation ::= `hal.tensor.export` $source\n ($name^)?\n (`into` `(` $target_storage^ `:` type($target_storage) `)`)?\n `:`\n custom<TypeAlias>($source_encoding, type($source)) (`{` $source_dims^ `}`)?\n `->`\n type($target)\n attr-dict\n
Defines an export of an SSA-form tensor to an external HAL buffer view.
The provided source_encoding
, if different from the source
type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.
An optional target_storage
buffer can be provided to hold the exported result. The export will fail at runtime if the storage is null or if it has insufficient capacity to store the output. The storage must be device-visible and defined for transfer-target and dispatch usage.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_64","title":"Operands:","text":"Operand Description source
tensor of any type values source_dims
variadic of index target_storage
buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#results_44","title":"Results:","text":"Result Description target
buffer or buffer_view"},{"location":"reference/mlir-dialects/HAL/#haltensorimport-haltensorimportop","title":"hal.tensor.import
(HAL::TensorImportOp)","text":"Imports a tensor from a HAL buffer view
Syntax:
operation ::= `hal.tensor.import` (`wait` `(` $wait_fence^ `)` `=` `` `>`)?\n $source\n ($name^)?\n `:` type($source) `->`\n custom<TypeAlias>($target_encoding, type($target)) (`{` $target_dims^ `}`)?\n attr-dict\n
Defines an import of an external HAL buffer view into a SSA-form tensor. An optional semaphore timepoint can be specified indicating when the buffer view is available for use. If no semaphore timepoint is provided it is assumed the buffer view is immediately available.
The provided target_encoding
, if different from the target
type, indicates that the ABI-facing type may differ from the internal representation. The types must be bitcastable (same storage size) and dynamically shaped values must have the same number of dynamic dimensions. This allows for casting between rank-0 and rank-N types, different element types, etc.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HAL/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HAL/#operands_65","title":"Operands:","text":"Operand Description source
buffer or buffer_view target_dims
variadic of index wait_fence
fence"},{"location":"reference/mlir-dialects/HAL/#results_45","title":"Results:","text":"Result Description target
tensor of any type values"},{"location":"reference/mlir-dialects/HAL/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/HAL/#affinityqueueattr","title":"AffinityQueueAttr","text":"specifies a set of allowed queues for an operation
WIP; see #10765. This may change in the future to either be a nested attribute on a larger affinity struct or be defined by an implementation of the affinity attr interface. For now this allows higher levels of the stack to specify queues such that the stream dialect can understand them and they can be lowered into the HAL dialect.
Specifies that an annotated operation or scope is only allowed to execute on the set of queues (0-64) provided. Operations will not run on other queues.
Example:
// any queue\n#hal.affinity.queue<*>\n// queues 4 and 5\n#hal.affinity.queue<[4, 5]>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters","title":"Parameters:","text":"Parameter C++ type Description mask int64_t
"},{"location":"reference/mlir-dialects/HAL/#collectiveattr","title":"CollectiveAttr","text":"collective operation and specification
Syntax:
#hal.collective<\n CollectiveKind, # kind\n std::optional<CollectiveReductionOp>, # reduction\n CollectiveElementType # element_type\n>\n
Specifies the collective operation to perform and any mode bits required.
"},{"location":"reference/mlir-dialects/HAL/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description kind CollectiveKind
reduction std::optional<CollectiveReductionOp>
element_type CollectiveElementType
"},{"location":"reference/mlir-dialects/HAL/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"descriptor set binding specification
Syntax:
#hal.descriptor_set.binding<\n int64_t, # ordinal\n DescriptorType, # type\n std::optional<DescriptorFlags> # flags\n>\n
Specifies a single binding within a descriptor set layout.
"},{"location":"reference/mlir-dialects/HAL/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
type DescriptorType
flags std::optional<DescriptorFlags>
"},{"location":"reference/mlir-dialects/HAL/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"descriptor set layout specification
Syntax:
#hal.descriptor_set.layout<\n int64_t, # ordinal\n ::llvm::ArrayRef<DescriptorSetBindingAttr>, # bindings\n std::optional<DescriptorSetLayoutFlags> # flags\n>\n
Specifies the layout information of a single set of descriptors used within an pipeline layout. Multiple of these sets may be used by a single entry point to allow for bindings with similar update frequencies to be grouped.
"},{"location":"reference/mlir-dialects/HAL/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
bindings ::llvm::ArrayRef<DescriptorSetBindingAttr>
flags std::optional<DescriptorSetLayoutFlags>
"},{"location":"reference/mlir-dialects/HAL/#descriptortypeattr","title":"DescriptorTypeAttr","text":"valid DescriptorType
Syntax:
#hal.descriptor_type<\n ::mlir::iree_compiler::IREE::HAL::DescriptorType # value\n>\n
Enum cases: * uniform_buffer (UniformBuffer
) * storage_buffer (StorageBuffer
)
"},{"location":"reference/mlir-dialects/HAL/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::HAL::DescriptorType
an enum of type DescriptorType"},{"location":"reference/mlir-dialects/HAL/#devicematcharchitectureattr","title":"DeviceMatchArchitectureAttr","text":"matches against a device architecture pattern
Matches a device by its runtime architecture. The format of the architecture pattern is device-dependent.
"},{"location":"reference/mlir-dialects/HAL/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchexecutableformatattr","title":"DeviceMatchExecutableFormatAttr","text":"matches when a device supports the given executable format
Matches a device only if it claims to support the given executable format pattern. It's still possible that the executable cannot be loaded such as if it uses unavailable device features. This is used for queries such as \"can you load ELF libraries?\" to quickly get to a set of executables to attempt without needing to try dozens that definitely cannot be loaded.
Note that different devices may share the same executable formats: for example a local synchronous CPU executor and a remote asynchronous CPU executor can both load ELF libraries. It's also possible for the same device to support multiple formats such as being able to load both platform-agnostic ELF libraries and platform-specific DLL/MachO/etc libraries.
"},{"location":"reference/mlir-dialects/HAL/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchfeatureattr","title":"DeviceMatchFeatureAttr","text":"matches against a supported device feature pattern
Matches a device that supports the given feature. The format of the feature pattern is device-dependent.
"},{"location":"reference/mlir-dialects/HAL/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicematchidattr","title":"DeviceMatchIDAttr","text":"matches against a device ID pattern
Matches a device by its canonical compiler/runtime ID.
"},{"location":"reference/mlir-dialects/HAL/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description pattern StringAttr
"},{"location":"reference/mlir-dialects/HAL/#devicetargetattr","title":"DeviceTargetAttr","text":"generic device target specification
Specifies the properties of a target runtime device. Target devices are specified with a canonical identifier matching those used by the runtime (such as cpu
, vulkan
, etc). Target devices may support several target executable formats specified with #hal.executable.target
. An optional configuration dictionary allows for overriding backend defaults.
Example:
#hal.device.target<\"llvm-cpu\", {\n executable_targets = [\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_32\">,\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_64\">,\n ]\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description deviceID StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/HAL/#executableobjectattr","title":"ExecutableObjectAttr","text":"object file reference
Defines an object file that can be linked into executables. Today this is only supported for external file references with paths the compiler can successfully resolve from its current working directory. Inlined data can optionally be provided to avoid the need for file system access and ensure the data source is attached to the IR as it makes its way through multiple compiler stages or reproducers.
Future revisions may change this to an interface that allows both internal and external resources to define the object contents. Linking needs to be updated to support various object compositions and certain backends may require additional infrastructure support.
In the long term the goal is to allow combinations of declared objects and generated code in order to give control of linking behavior to frontends. Instead of needing global command line flags to link in additional blobs the frontend can emit executables with the dependencies already defined per variant without needing to reach into the IREE compiler code.
Example:
#hal.executable.object<{path = \"some/file.obj\"}>\n#hal.executable.object<{\n path = \"some/embedded/file.obj\",\n data = dense<[...]> : vector<2048xi8>\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_10","title":"Parameters:","text":"Parameter C++ type Description path StringAttr
data DenseIntElementsAttr
"},{"location":"reference/mlir-dialects/HAL/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"target-specific object file references
A dictionary mapping executable target specifications to a list of objects. This is used to allow layers of the stack that support multi-targeting to specify information used during lowering into each particular target.
The key attributes are matched against each target variant based on the backend and format as well as any configuration data provided. When comparing the configuration only fields present in both the key and target variant will be checked and must match. This allows specification of generic sets (\"all x86_64 targets get these objects\") as well as specific ones (\"only x86_64 targets with vector_size = 64 get these objects\").
Example:
#hal.executable.objects<{\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-arm_64\"> = [\n #hal.executable.object<{path = \"some/file_arm_64.obj\"}>\n ],\n #hal.executable.target<\"llvm-cpu\", \"embedded-elf-x86_64\"> = [\n #hal.executable.object<{path = \"some/file_x86_64.obj\"}>\n ]\n}>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_11","title":"Parameters:","text":"Parameter C++ type Description targets ArrayAttr
targetObjects ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#executabletargetattr","title":"ExecutableTargetAttr","text":"generic executable target specification
Specifies how to compile an executable for a specific target backend. A backend is used to translate and serialize the executable into the final form passed to the runtime. The format of the executable is a target-specific value indicating the required runtime support to load the deployed artifact. An optionally provided configuration dictionary overrides backend-specific defaults.
Example:
// Produce a system-native ELF for x86-64 systems using the LLVM backend:\n #hal.executable.target<\"llvm-cpu\", \"system-elf-x86_64\", {\n triple = \"x86_64-unknown-linux-elf\",\n cpu = \"host\",\n cpu_features = \"host\",\n abi = \"lp32\",\n ...\n }>\n
The same compilation backend may be used to translate executables for several different runtime devices. Likewise the same runtime device may use one of many different executable targets. Assume an N:M mapping between the two in all cases.
"},{"location":"reference/mlir-dialects/HAL/#parameters_12","title":"Parameters:","text":"Parameter C++ type Description backend StringAttr
format StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/HAL/#interfacebindingattr","title":"InterfaceBindingAttr","text":"interface binding specification
Syntax:
#hal.interface.binding<\n int64_t, # set\n int64_t # binding\n>\n
Specifies the descriptor set and binding ordinal of a particular layout binding.
Example:
#hal.interface.binding<0, 1>\n
"},{"location":"reference/mlir-dialects/HAL/#parameters_13","title":"Parameters:","text":"Parameter C++ type Description set int64_t
binding int64_t
"},{"location":"reference/mlir-dialects/HAL/#matchallattr","title":"MatchAllAttr","text":"matches if all subexpressions match
Returns true only if all subexpressions return true (logical AND) or empty.
"},{"location":"reference/mlir-dialects/HAL/#parameters_14","title":"Parameters:","text":"Parameter C++ type Description conditions ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#matchalwaysattr","title":"MatchAlwaysAttr","text":"always matches
Syntax: #hal.match.always
Returns true (constant true).
"},{"location":"reference/mlir-dialects/HAL/#matchanyattr","title":"MatchAnyAttr","text":"matches if any subexpression matches
Returns true if any subexpression matches (logical OR) and not empty.
"},{"location":"reference/mlir-dialects/HAL/#parameters_15","title":"Parameters:","text":"Parameter C++ type Description conditions ArrayAttr
"},{"location":"reference/mlir-dialects/HAL/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"executable entry point layout specification
Syntax:
#hal.pipeline.layout<\n int64_t, # pushConstants\n ::llvm::ArrayRef<DescriptorSetLayoutAttr> # setLayouts\n>\n
Specifies the layout information used for interacting with executable functions. This allows host code to correctly map parameters to the lower-level target-specific argument passing behavior.
"},{"location":"reference/mlir-dialects/HAL/#parameters_16","title":"Parameters:","text":"Parameter C++ type Description pushConstants int64_t
setLayouts ::llvm::ArrayRef<DescriptorSetLayoutAttr>
"},{"location":"reference/mlir-dialects/HAL/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/HAL/#allocator","title":"allocator","text":"Allocates buffers for a particular device memory space.
"},{"location":"reference/mlir-dialects/HAL/#buffer","title":"buffer","text":"A memory buffer with a specific memory_type that is used to describe the capabilities and behavior of the backing memory of the buffer. Buffers may be any mix of host-accessible, host-coherent, or device-accessible for various usages. Depending on these memory types the buffers may be mapped for access on the host as memory though certain restrictions may be imposed.
"},{"location":"reference/mlir-dialects/HAL/#buffer_view","title":"buffer_view","text":"A shaped and typed buffer reference. This just wraps an existing hal.buffer with its associated metadata to make it easier to pass across ABI boundaries. In most cases buffer views can be elided entirely by the compiler and they'll only be seen when calling external functions.
"},{"location":"reference/mlir-dialects/HAL/#collectivechannel","title":"collective.channel","text":"Channel identifier used to allow for participation in multiple collective groups.
"},{"location":"reference/mlir-dialects/HAL/#command_buffer","title":"command_buffer","text":"Asynchronous command buffer recording interface. Commands are recorded by the implementation for later submission to command queues.
"},{"location":"reference/mlir-dialects/HAL/#descriptor_set_layout","title":"descriptor_set_layout","text":"Descriptor set layout.
"},{"location":"reference/mlir-dialects/HAL/#device","title":"device","text":"Logical device instance.
"},{"location":"reference/mlir-dialects/HAL/#event","title":"event","text":"Events are used for defining synchronization scopes within CommandBuffers. An event only exists within a single CommandBuffer and must not be used across CommandBuffers from the same device or others.
"},{"location":"reference/mlir-dialects/HAL/#executable","title":"executable","text":"A prepared and ready-to-dispatch executable.
"},{"location":"reference/mlir-dialects/HAL/#fence","title":"fence","text":"A set of semaphore timepoints defining a common point in time across multiple timelines.
"},{"location":"reference/mlir-dialects/HAL/#buffer_1","title":"buffer","text":"A stateless file handle that can be read/written using queue-ordered transfer operations.
"},{"location":"reference/mlir-dialects/HAL/#pipeline_layout","title":"pipeline_layout","text":"A pipeline layout describing the descriptor sets and push constants used.
"},{"location":"reference/mlir-dialects/HALInline/","title":"HAL/Inline","text":""},{"location":"reference/mlir-dialects/HALInline/#hal_inline-dialect","title":"'hal_inline' Dialect","text":"IREE inline HAL interop runtime module dialect.
Low-level dialect for limited in-process ABI interop with the full HAL. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.
This dialect can be used alongside the full HAL but is intended for use in standalone configurations or paired with the hal_loader
dialect which also carries the same usage restrictions.
See hal_inline.imports.mlir
for the full list of exported functions.
- 'hal_inline' Dialect
- Operation definition
- Buffer ops
- hal_inline.buffer.allocate.initialized (HAL::Inline::BufferAllocateInitializedOp)
- hal_inline.buffer.allocate (HAL::Inline::BufferAllocateOp)
- hal_inline.buffer.length (HAL::Inline::BufferLengthOp)
- hal_inline.buffer.storage (HAL::Inline::BufferStorageOp)
- hal_inline.buffer.subspan (HAL::Inline::BufferSubspanOp)
- hal_inline.buffer.wrap (HAL::Inline::BufferWrapOp)
- Buffer view ops
- hal_inline.buffer_view.assert (HAL::Inline::BufferViewAssertOp)
- hal_inline.buffer_view.buffer (HAL::Inline::BufferViewBufferOp)
- hal_inline.buffer_view.create (HAL::Inline::BufferViewCreateOp)
- hal_inline.buffer_view.dim (HAL::Inline::BufferViewDimOp)
- hal_inline.buffer_view.element_type (HAL::Inline::BufferViewElementTypeOp)
- hal_inline.buffer_view.encoding_type (HAL::Inline::BufferViewEncodingTypeOp)
- hal_inline.buffer_view.rank (HAL::Inline::BufferViewRankOp)
- hal_inline.buffer_view.trace (HAL::Inline::BufferViewTraceOp)
- Device ops
- hal_inline.device.query (HAL::Inline::DeviceQueryOp)
"},{"location":"reference/mlir-dialects/HALInline/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HALInline/#buffer-ops","title":"Buffer ops","text":"Ops for !hal.buffer
/ iree_hal_buffer_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocateinitialized-halinlinebufferallocateinitializedop","title":"hal_inline.buffer.allocate.initialized
(HAL::Inline::BufferAllocateInitializedOp)","text":"Buffer allocation with cloning
Syntax:
operation ::= `hal_inline.buffer.allocate.initialized` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `alignment` `(` $minimum_alignment `)`\n `:` custom<SizeAwareType>(type($result), ref($length)) `in` type($storage)\n attr-dict-with-keyword\n
Allocates a buffer with a copy of the provided contents.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands","title":"Operands:","text":"Operand Description minimum_alignment
index source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results","title":"Results:","text":"Result Description result
buffer storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferallocate-halinlinebufferallocateop","title":"hal_inline.buffer.allocate
(HAL::Inline::BufferAllocateOp)","text":"Empty buffer allocation operation
Syntax:
operation ::= `hal_inline.buffer.allocate` `alignment` `(` $minimum_alignment `)`\n `:` custom<SizeAwareType>(type($result), $allocation_size) `in` type($storage)\n attr-dict-with-keyword\n
Allocates a buffer of the given size. The size of the buffer returned may be larger than the requested size if the allocator has specific alignment requirements or minimum allocation sizes.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands_1","title":"Operands:","text":"Operand Description minimum_alignment
index allocation_size
index"},{"location":"reference/mlir-dialects/HALInline/#results_1","title":"Results:","text":"Result Description result
buffer storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferlength-halinlinebufferlengthop","title":"hal_inline.buffer.length
(HAL::Inline::BufferLengthOp)","text":"Buffer byte length accessor
Syntax:
operation ::= `hal_inline.buffer.length` `<` $buffer `:` type($buffer) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the allocated size of a buffer in bytes. May be less than the underlying buffer allocation if this is a subspan or view into another buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_2","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HALInline/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferstorage-halinlinebufferstorageop","title":"hal_inline.buffer.storage
(HAL::Inline::BufferStorageOp)","text":"Buffer backing storage accessor
Syntax:
operation ::= `hal_inline.buffer.storage` `<` $buffer `:` type($buffer) `>`\n `:` type($storage)\n attr-dict-with-keyword\n
Returns the host backing storage of the HAL buffer as a subspan limited to to the buffer's logical range (meaning that byte 0 of the returned buffer is byte 0 of the HAL buffer).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_3","title":"Operands:","text":"Operand Description buffer
buffer"},{"location":"reference/mlir-dialects/HALInline/#results_3","title":"Results:","text":"Result Description storage
a reference counted byte buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffersubspan-halinlinebuffersubspanop","title":"hal_inline.buffer.subspan
(HAL::Inline::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `hal_inline.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SizeAwareOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_4","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results_4","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebufferwrap-halinlinebufferwrapop","title":"hal_inline.buffer.wrap
(HAL::Inline::BufferWrapOp)","text":"Host buffer wrapping operation
Syntax:
operation ::= `hal_inline.buffer.wrap` `source` `(` $source `:` type($source) `)` `` `[` $offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Tries wrapping a !hal.buffer around host memory backed by the given byte buffer.
Interfaces: OpAsmOpInterface, SizeAwareOpInterface
"},{"location":"reference/mlir-dialects/HALInline/#operands_5","title":"Operands:","text":"Operand Description source
a reference counted byte buffer offset
index length
index"},{"location":"reference/mlir-dialects/HALInline/#results_5","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#buffer-view-ops","title":"Buffer view ops","text":"Ops for !hal.buffer_view
/ iree_hal_buffer_view_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewassert-halinlinebufferviewassertop","title":"hal_inline.buffer_view.assert
(HAL::Inline::BufferViewAssertOp)","text":"Buffer view contents assertion
Syntax:
operation ::= `hal_inline.buffer_view.assert` `<` $buffer_view `:` type($buffer_view) `>`\n `message` `(` $message `)`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n attr-dict-with-keyword\n
Asserts that the buffer view contains a data compatible tensor with the given encoding. Program execution will abort as if std.assert
had been used.
"},{"location":"reference/mlir-dialects/HALInline/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_6","title":"Operands:","text":"Operand Description buffer_view
buffer_view element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewbuffer-halinlinebufferviewbufferop","title":"hal_inline.buffer_view.buffer
(HAL::Inline::BufferViewBufferOp)","text":"Buffer view buffer accessor
Syntax:
operation ::= `hal_inline.buffer_view.buffer` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the buffer backing this view's contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_7","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_6","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewcreate-halinlinebufferviewcreateop","title":"hal_inline.buffer_view.create
(HAL::Inline::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `hal_inline.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_8","title":"Operands:","text":"Operand Description source_buffer
buffer source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/HALInline/#results_7","title":"Results:","text":"Result Description result
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewdim-halinlinebufferviewdimop","title":"hal_inline.buffer_view.dim
(HAL::Inline::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `hal_inline.buffer_view.dim` `<` $buffer_view `:` type($buffer_view) `>`\n `` `[` $index `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_9","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_8","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewelement_type-halinlinebufferviewelementtypeop","title":"hal_inline.buffer_view.element_type
(HAL::Inline::BufferViewElementTypeOp)","text":"Buffer view element type query
Syntax:
operation ::= `hal_inline.buffer_view.element_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the element type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_10","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_9","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewencoding_type-halinlinebufferviewencodingtypeop","title":"hal_inline.buffer_view.encoding_type
(HAL::Inline::BufferViewEncodingTypeOp)","text":"Buffer view encoding type query
Syntax:
operation ::= `hal_inline.buffer_view.encoding_type` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the encoding type of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_11","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_10","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewrank-halinlinebufferviewrankop","title":"hal_inline.buffer_view.rank
(HAL::Inline::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `hal_inline.buffer_view.rank` `<` $buffer_view `:` type($buffer_view) `>`\n `:` type($result)\n attr-dict-with-keyword\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#operands_12","title":"Operands:","text":"Operand Description buffer_view
buffer_view"},{"location":"reference/mlir-dialects/HALInline/#results_11","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinebuffer_viewtrace-halinlinebufferviewtraceop","title":"hal_inline.buffer_view.trace
(HAL::Inline::BufferViewTraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `hal_inline.buffer_view.trace` $key `=`\n $operands `:` type($operands)\n attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given buffer views and titles them with the given key. The key is informational only and useful for titling/marking specific sets of buffers for easier searching.
"},{"location":"reference/mlir-dialects/HALInline/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALInline/#operands_13","title":"Operands:","text":"Operand Description operands
variadic of buffer_view"},{"location":"reference/mlir-dialects/HALInline/#device-ops","title":"Device ops","text":"Ops for !hal.device
/ iree_hal_device_t
.
"},{"location":"reference/mlir-dialects/HALInline/#hal_inlinedevicequery-halinlinedevicequeryop","title":"hal_inline.device.query
(HAL::Inline::DeviceQueryOp)","text":"Returns a runtime configuration parameter from the device
Syntax:
operation ::= `hal_inline.device.query` `key` `(` $category `:` `` `:` $key `)`\n `:` type($ok) `,` type($value)\n (`=` $default_value^)?\n attr-dict-with-keyword\n
Queries a device configuration parameter with the given key. Returns a status indicating whether the pair was recognized/available and if it was the value converted to the specified type. Queries must return the same value for the lifetime of the module though may vary from run to run.
This is roughly equivalent to the sysconf
linux syscall (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact set of keys available and their interpretation is target-dependent. If there is a HAL match attribute (#hal.device.match.*
) or op (hal.device.match.*
) prefer to use that in order to get compile-time propagation when the target is specified and elide the runtime query and get compile-time verification when a runtime query is required.
Users of the op must check the ok
result before using the value as what set of keys is available may change over time. If in doubt: don't use this. Each key used adds additional versioning and testing complexity as runtime code path changes will explode combinatorially and should be treated with as much care as a binary file format change. Keys should be prefixed with ex.
when experimental indicating that they are not expected to be present forever; all non-experimental keys should be vetted.
Well-known keys: (none yet)
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALInline/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription category
::mlir::StringAttrstring attribute key
::mlir::StringAttrstring attribute default_value
::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/HALInline/#results_12","title":"Results:","text":"Result Description ok
1-bit signless integer value
any type"},{"location":"reference/mlir-dialects/HALLoader/","title":"HAL/Loader","text":""},{"location":"reference/mlir-dialects/HALLoader/#hal_loader-dialect","title":"'hal_loader' Dialect","text":"IREE HAL inline executable loader runtime module dialect.
Low-level dialect for dynamically loading executables and dispatching work. Only operates synchronously, single-threaded, and on host-local buffers. Use the full HAL for all other cases.
This dialect can be used alongside the full HAL but is intended for use in conjunction with the hal_inline
dialect which also carries the same usage restrictions.
See hal_loader.imports.mlir
for the full list of exported functions.
- 'hal_loader' Dialect
- Operation definition
- Executable ops
- hal_loader.executable.dispatch (HAL::Loader::ExecutableDispatchOp)
- hal_loader.executable.dispatch.symbol (HAL::Loader::ExecutableDispatchSymbolOp)
- hal_loader.executable.load (HAL::Loader::ExecutableLoadOp)
- hal_loader.executable.lookup (HAL::Loader::ExecutableLookupOp)
- hal_loader.executable.query_support (HAL::Loader::ExecutableQuerySupportOp)
"},{"location":"reference/mlir-dialects/HALLoader/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/HALLoader/#executable-ops","title":"Executable ops","text":"Ops for !hal.executable
/ iree_hal_executable_t
.
"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatch-halloaderexecutabledispatchop","title":"hal_loader.executable.dispatch
(HAL::Loader::ExecutableDispatchOp)","text":"Inline executable dispatch operation
Syntax:
operation ::= `hal_loader.executable.dispatch` `executable` `(` $executable `:` type($executable) `)`\n `` `[` $entry_point `]`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n (`constants` `(` `[` $push_constants^ `]` `)`)?\n `bindings` `(` `[`\n custom<DispatchBindings>($binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Dispatches execution to an executable entry point with the given parameters.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/HALLoader/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::IntegerAttrsize_t"},{"location":"reference/mlir-dialects/HALLoader/#operands","title":"Operands:","text":"Operand Description executable
executable workgroup_x
index workgroup_y
index workgroup_z
index push_constants
variadic of 32-bit signless integer binding_buffers
variadic of a reference counted byte buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutabledispatchsymbol-halloaderexecutabledispatchsymbolop","title":"hal_loader.executable.dispatch.symbol
(HAL::Loader::ExecutableDispatchSymbolOp)","text":"Inline executable dispatch operation
Syntax:
operation ::= `hal_loader.executable.dispatch.symbol` `executable` `(` $executable `:` type($executable) `)`\n `target` `(` $entry_point `)`\n `workgroups` `(` `[`\n $workgroup_x `,`\n $workgroup_y `,`\n $workgroup_z\n `]` `)`\n (`constants` `(` `[` $push_constants^ `]` `)`)?\n `bindings` `(` `[`\n custom<DispatchBindings>($binding_buffers,\n type($binding_buffers),\n $binding_offsets,\n $binding_lengths)\n `]` `)`\n attr-dict-with-keyword\n
Dispatches execution to an executable entry point with the given parameters. The entry point is a symbolic reference to an exported entry point.
Traits: AttrSizedOperandSegments
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_1","title":"Operands:","text":"Operand Description executable
executable workgroup_x
index workgroup_y
index workgroup_z
index push_constants
variadic of 32-bit signless integer binding_buffers
variadic of a reference counted byte buffer binding_offsets
variadic of index binding_lengths
variadic of index"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutableload-halloaderexecutableloadop","title":"hal_loader.executable.load
(HAL::Loader::ExecutableLoadOp)","text":"Dynamically loads an executable
Syntax:
operation ::= `hal_loader.executable.load` `format` `(` $format `)`\n `data` `(` $data `)`\n (`constants` `(` `[` $constants^ `]` `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Creates, loads, and dynamically links an executable.
Optional constants provide for specialization of the executable based on runtime-derived parameters.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#operands_2","title":"Operands:","text":"Operand Description data
a reference counted byte buffer constants
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/HALLoader/#results","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablelookup-halloaderexecutablelookupop","title":"hal_loader.executable.lookup
(HAL::Loader::ExecutableLookupOp)","text":"Executable cache lookup pseudo-op
Syntax:
operation ::= `hal_loader.executable.lookup` `executable` `(` $executable `)`\n `:` type($result)\n attr-dict-with-keyword\n
Used during conversion to provide a placeholder for a globally cached and possibly lazy-initialized executable.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SymbolUserOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription executable
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_1","title":"Results:","text":"Result Description result
executable"},{"location":"reference/mlir-dialects/HALLoader/#hal_loaderexecutablequery_support-halloaderexecutablequerysupportop","title":"hal_loader.executable.query_support
(HAL::Loader::ExecutableQuerySupportOp)","text":"Queries whether an executable format is supported
Syntax:
operation ::= `hal_loader.executable.query_support` `format` `(` $executable_format `)`\n `:` type($supported)\n attr-dict-with-keyword\n
Returns true if the given format is supported by the device loader. This does not guarantee that loading will succeed as the executable may require functionality that cannot be met my the hosting runtime environment.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/HALLoader/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription executable_format
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/HALLoader/#results_2","title":"Results:","text":"Result Description supported
1-bit signless integer"},{"location":"reference/mlir-dialects/IOParameters/","title":"IO/Parameters","text":""},{"location":"reference/mlir-dialects/IOParameters/#io_parameters-dialect","title":"'io_parameters' Dialect","text":"External parameter resource management APIs.
Parameters are externalized storage for resources that are asynchronously accessible and device-aware. Parameters can be read or written on the same device timelines as the operations that consume or produce them and with locality pinning to ensure memory doesn't need to move. Parameters are referenced by a scope and a key, with the scope being optional but strongly recommended as a way to distinguish sets of parameters that may exist when multiple model parts are compiled together and would otherwise collide.
Parameters are provided by a few operations implementing a virtual interface and can support shared parameters (same storage used in multiple contexts, or outliving a single instantiation in a context), in-memory caches, memory-mapped files (including directly using the mapped memory for execution when devices support it), iree_hal_file_t
usage for device-supported I/O, and parameter subsetting for things like runtime sharding.
Alongside read(+load) and write operations gather and scatter allow for batching of large numbers of reads and writes into/from single buffers. For parameter providers that can batch operations this allows for a handful (~1-4) of calls out to perform many more operations (~thousands). Modeling the gather/scatter also gives us a point where we could extract the mapping and use it to repack files/defrag memory in the future.
See io_parameters.imports.mlir
for the full list of exported functions.
- 'io_parameters' Dialect
- Operation definition
- Parameter I/O ops
- io_parameters.gather (IO::Parameters::GatherOp)
- io_parameters.load (IO::Parameters::LoadOp)
- io_parameters.scatter (IO::Parameters::ScatterOp)
"},{"location":"reference/mlir-dialects/IOParameters/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IOParameters/#parameter-io-ops","title":"Parameter I/O ops","text":"Ops parameter I/O.
"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersgather-ioparametersgatherop","title":"io_parameters.gather
(IO::Parameters::GatherOp)","text":"Gathers multiple parameters from a parameter scope
Syntax:
operation ::= `io_parameters.gather` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `{`\n custom<ParameterGatherOperations>(\n $source_scope, $source_keys, $source_offsets,\n $target_buffer, type($target_buffer), $target_offsets, $target_lengths)\n `}`\n attr-dict-with-keyword\n
Asynchronously gathers one or more parameters into a single target buffer. This is equivalent to one read per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IOParameters/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_keys
::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_offsets
variadic of 64-bit signless integer target_buffer
buffer target_offsets
variadic of index target_lengths
variadic of index"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersload-ioparametersloadop","title":"io_parameters.load
(IO::Parameters::LoadOp)","text":"Reads a parameter from a parameter scope
Syntax:
operation ::= `io_parameters.load` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `source` `(` custom<ParameterReference>($source_scope, $source_key) `)`\n `` `[` $source_offset `]`\n `type` `(` $memory_types `)`\n `usage` `(` $buffer_usage `)`\n `:` custom<SizeAwareType>(type($result), $length)\n attr-dict-with-keyword\n
Asynchronously reads a parameter from an external parameter provider and returns the resulting buffer. Depending on the parameter and buffer types this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if an allocate + read had been used.
Interfaces: Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/IOParameters/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute memory_types
mlir::iree_compiler::IREE::HAL::MemoryTypeBitfieldAttrvalid MemoryType buffer_usage
mlir::iree_compiler::IREE::HAL::BufferUsageBitfieldAttrvalid BufferUsage"},{"location":"reference/mlir-dialects/IOParameters/#operands_1","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_offset
64-bit signless integer length
index"},{"location":"reference/mlir-dialects/IOParameters/#results","title":"Results:","text":"Result Description result
buffer"},{"location":"reference/mlir-dialects/IOParameters/#io_parametersscatter-ioparametersscatterop","title":"io_parameters.scatter
(IO::Parameters::ScatterOp)","text":"Scatters multiple parameters to a parameter scope
Syntax:
operation ::= `io_parameters.scatter` `<` $device `:` type($device) `>`\n `affinity` `(` $queue_affinity `)`\n `wait` `(` $wait_fence `)`\n `signal` `(` $signal_fence `)`\n `{`\n custom<ParameterScatterOperations>(\n $source_buffer, type($source_buffer), $source_offsets, $source_lengths,\n $target_scope, $target_keys, $target_offsets)\n `}`\n attr-dict-with-keyword\n
Asynchronously scatters one or more parameters from a single source buffer into one or more parameters. This is equivalent to one write per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IOParameters/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_keys
::mlir::ArrayAttrstring array attribute"},{"location":"reference/mlir-dialects/IOParameters/#operands_2","title":"Operands:","text":"Operand Description device
device queue_affinity
64-bit signless integer wait_fence
fence signal_fence
fence source_buffer
buffer source_offsets
variadic of index source_lengths
variadic of index target_offsets
variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/IREEInput/","title":"IREEInput","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_input-dialect","title":"'iree_input' Dialect","text":"Public ops/type/attributes legal for input to IREE's compiler.
IREE's compiler allows as input a number of common dialects. This dialect contains structural and unique ops that do not exist elsewhere or that IREE has an interest in maintaining as a stable set.
The contents of this dialect often mirror various constructs in IREE's internal implementation. The focus here is on simplicity and stability over time. Generally, this dialect does not use \"advanced\" features and should be broadly source compatible over a range of LLVM versions. There are of course, limits, and source-compatibility is not guaranteed, since LLVM/MLIR's API surface is itself unstable.
- 'iree_input' Dialect
- Operation definition
- Buffer and buffer view ops
- iree_input.buffer.subspan (Input::BufferSubspanOp)
- iree_input.buffer_view.create (Input::BufferViewCreateOp)
- iree_input.buffer_view.dim (Input::BufferViewDimOp)
- iree_input.buffer_view.rank (Input::BufferViewRankOp)
- Byte buffer ops
- iree_input.byte_buffer.constant (Input::ByteBufferConstantOp)
- Compiler hint ops
- iree_input.optimization_barrier (Input::OptimizationBarrierOp)
- Dispatch ops
- iree_input.dispatch (Input::DispatchOp)
- Executable source ops
- iree_input.executable.export (Input::ExecutableExportOp)
- iree_input.executable.source_end (Input::ExecutableSourceEndOp)
- iree_input.executable.source (Input::ExecutableSourceOp)
- Global variable ops
- iree_input.global.address (Input::GlobalAddressOp)
- iree_input.global.load.indirect (Input::GlobalLoadIndirectOp)
- iree_input.global.load (Input::GlobalLoadOp)
- iree_input.global (Input::GlobalOp)
- iree_input.global.store.indirect (Input::GlobalStoreIndirectOp)
- iree_input.global.store (Input::GlobalStoreOp)
- Mutable list ops
- iree_input.list.create (Input::ListCreateOp)
- iree_input.list.get (Input::ListGetOp)
- iree_input.list.resize (Input::ListResizeOp)
- iree_input.list.set (Input::ListSetOp)
- iree_input.list.size (Input::ListSizeOp)
- Pseudo ops for conversion support
- iree_input.tensor.export (Input::TensorExportOp)
- iree_input.tensor.import (Input::TensorImportOp)
- Tensor ops
- iree_input.tensor.bitcast (Input::TensorBitCastOp)
- iree_input.tensor.clone (Input::TensorCloneOp)
- iree_input.tensor.load (Input::TensorLoadOp)
- iree_input.tensor.reshape (Input::TensorReshapeOp)
- iree_input.tensor.slice (Input::TensorSliceOp)
- iree_input.tensor.splat (Input::TensorSplatOp)
- iree_input.tensor.store (Input::TensorStoreOp)
- iree_input.tensor.trace (Input::TensorTraceOp)
- iree_input.tensor.update (Input::TensorUpdateOp)
- Utility ops
- iree_input.align (Input::AlignOp)
- iree_input.null (Input::NullOp)
- Workgroup dispatch ops
- iree_input.dispatch.workgroup.count (Input::DispatchWorkgroupCountOp)
- iree_input.dispatch.workgroup.id (Input::DispatchWorkgroupIDOp)
- iree_input.dispatch.workgroup.size (Input::DispatchWorkgroupSizeOp)
- Attribute definition
- DescriptorSetBindingAttr
- DescriptorSetLayoutAttr
- DescriptorTypeAttr
- DeviceTargetAttr
- ExecutableObjectAttr
- ExecutableObjectsAttr
- ExecutableTargetAttr
- PipelineLayoutAttr
- Type constraint definition
- list
- Type definition
- BufferType
- BufferViewType
- ByteBufferType
- ListType
- PtrType
- VariantType
"},{"location":"reference/mlir-dialects/IREEInput/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffer-and-buffer-view-ops","title":"Buffer and buffer view ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffersubspan-inputbuffersubspanop","title":"iree_input.buffer.subspan
(Input::BufferSubspanOp)","text":"Buffer subspan operation
Syntax:
operation ::= `iree_input.buffer.subspan` `<` $source_buffer `:` type($source_buffer) `>`\n `` `[` $source_offset `,` $length `]`\n `:` type($result)\n attr-dict-with-keyword\n
Returns a reference to a subspan of the buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands","title":"Operands:","text":"Operand Description source_buffer
Buffer is an untyped bag of bits with no shape or dtype source_offset
index length
index"},{"location":"reference/mlir-dialects/IREEInput/#results","title":"Results:","text":"Result Description result
Buffer is an untyped bag of bits with no shape or dtype"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewcreate-inputbufferviewcreateop","title":"iree_input.buffer_view.create
(Input::BufferViewCreateOp)","text":"Buffer view reference initializer
Syntax:
operation ::= `iree_input.buffer_view.create` `buffer` `(` $source_buffer `:` type($source_buffer) `)`\n `` `[` $source_offset `,` $source_length `]`\n `shape` `(` `[` $shape `]` `)`\n `type` `(` $element_type `)`\n `encoding` `(` $encoding_type `)`\n `:` type($result)\n attr-dict-with-keyword\n
Creates a reference to a buffer with a particular shape and element type. The buffer is not copied and both the original and view references must be synchronized. This makes it easier to associate commonly-carried metadata along with the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_1","title":"Operands:","text":"Operand Description source_buffer
Buffer is an untyped bag of bits with no shape or dtype source_offset
index source_length
index element_type
32-bit signless integer encoding_type
32-bit signless integer shape
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_1","title":"Results:","text":"Result Description result
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewdim-inputbufferviewdimop","title":"iree_input.buffer_view.dim
(Input::BufferViewDimOp)","text":"Buffer view dimension value query
Syntax:
operation ::= `iree_input.buffer_view.dim` $buffer_view `,` $index attr-dict `:` type($result)\n
Returns the value of the given dimension.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription index
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_2","title":"Operands:","text":"Operand Description buffer_view
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_2","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbuffer_viewrank-inputbufferviewrankop","title":"iree_input.buffer_view.rank
(Input::BufferViewRankOp)","text":"Buffer view rank query
Syntax:
operation ::= `iree_input.buffer_view.rank` $buffer_view attr-dict `:` type($result)\n
Returns the rank of the buffer view.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_3","title":"Operands:","text":"Operand Description buffer_view
View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#results_3","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#byte-buffer-ops","title":"Byte buffer ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputbyte_bufferconstant-inputbytebufferconstantop","title":"iree_input.byte_buffer.constant
(Input::ByteBufferConstantOp)","text":"Constant host-side byte buffer
Syntax:
operation ::= `iree_input.byte_buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n
Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::StringAttrstring attribute alignment
::mlir::IntegerAttrindex attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_4","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/IREEInput/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputoptimization_barrier-inputoptimizationbarrierop","title":"iree_input.optimization_barrier
(Input::OptimizationBarrierOp)","text":"Prevents compiler optimizations across a value.
Syntax:
operation ::= `iree_input.optimization_barrier` attr-dict\n ($operands^ `:` type($operands))?\n
Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.
Traits: SameOperandsAndResultType
"},{"location":"reference/mlir-dialects/IREEInput/#operands_4","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#results_5","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#dispatch-ops","title":"Dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatch-inputdispatchop","title":"iree_input.dispatch
(Input::DispatchOp)","text":"A dispatch of an executable across a grid
Syntax:
operation ::= `iree_input.dispatch` $entry_point\n (`[` $workload^ `]`)? ``\n `(` $arguments `)` attr-dict `:`\n custom<ShapedFunctionType>(ref($arguments),\n type($arguments), $argument_dims,\n type($results), $result_dims,\n $tied_operands)\n
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_point
::mlir::SymbolRefAttrsymbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_5","title":"Operands:","text":"Operand Description workload
variadic of index arguments
variadic of any type argument_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_6","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/IREEInput/#executable-source-ops","title":"Executable source ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutableexport-inputexecutableexportop","title":"iree_input.executable.export
(Input::ExecutableExportOp)","text":"Executable entry point declaration
Syntax:
operation ::= `iree_input.executable.export` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n `ordinal` `(` $ordinal `)`\n `layout` `(` $layout `)`\n attr-dict-with-keyword\n
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrsize_t layout
::mlir::iree_compiler::IREE::Input::PipelineLayoutAttrexecutable entry point layout specification workgroup_size
::mlir::ArrayAttrindex array attribute subgroup_size
::mlir::IntegerAttrsize_t workgroup_local_memory
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource_end-inputexecutablesourceendop","title":"iree_input.executable.source_end
(Input::ExecutableSourceEndOp)","text":"Terminator pseudo-op for the executable source op
Syntax:
operation ::= `iree_input.executable.source_end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputexecutablesource-inputexecutablesourceop","title":"iree_input.executable.source
(Input::ExecutableSourceOp)","text":"
Generic source contents of an executable op
Syntax:
operation ::= `iree_input.executable.source` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n ``\n regions\n
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute objects
::mlir::iree_compiler::IREE::Input::ExecutableObjectsAttrtarget-specific object file references"},{"location":"reference/mlir-dialects/IREEInput/#global-variable-ops","title":"Global variable ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobaladdress-inputglobaladdressop","title":"iree_input.global.address
(Input::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `iree_input.global.address` $global attr-dict `:` type($result)\n
Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_7","title":"Results:","text":"Result Description result
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputgloballoadindirect-inputgloballoadindirectop","title":"iree_input.global.load.indirect
(Input::GlobalLoadIndirectOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `iree_input.global.load.indirect` $global attr-dict `:` type($global) `->` type($result)\n
Returns a copy of the global value.
"},{"location":"reference/mlir-dialects/IREEInput/#operands_6","title":"Operands:","text":"Operand Description global
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#results_8","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalload-inputgloballoadop","title":"iree_input.global.load
(Input::GlobalLoadOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `iree_input.global.load` $global attr-dict `:` type($result)\n
Returns a copy of the global value.
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_9","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobal-inputglobalop","title":"iree_input.global
(Input::GlobalOp)","text":"Stateful global variable declaration
Syntax:
operation ::= `iree_input.global` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n (`initializer` `(` $initializer^ `)`)?\n custom<TypeOrAttr>($type, $initial_value)\n
Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different global storage.
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initializer
::mlir::FlatSymbolRefAttrflat symbol reference attribute initial_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstoreindirect-inputglobalstoreindirectop","title":"iree_input.global.store.indirect
(Input::GlobalStoreIndirectOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `iree_input.global.store.indirect` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a copy of the value into a global.
"},{"location":"reference/mlir-dialects/IREEInput/#operands_7","title":"Operands:","text":"Operand Description value
any type global
ranked tensor of any type values or index or signless integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputglobalstore-inputglobalstoreop","title":"iree_input.global.store
(Input::GlobalStoreOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `iree_input.global.store` $value `,` $global attr-dict `:` type($value)\n
Stores a copy of the value into a global.
Interfaces: SymbolUserOpInterface
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_8","title":"Operands:","text":"Operand Description value
any type"},{"location":"reference/mlir-dialects/IREEInput/#mutable-list-ops","title":"Mutable list ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistcreate-inputlistcreateop","title":"iree_input.list.create
(Input::ListCreateOp)","text":"Creates a new empty list
Syntax:
operation ::= `iree_input.list.create` ($initial_capacity^)? attr-dict `:` type($result)\n
Creates a new empty list with an optional initial capacity.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_9","title":"Operands:","text":"Operand Description initial_capacity
index"},{"location":"reference/mlir-dialects/IREEInput/#results_10","title":"Results:","text":"Result Description result
list"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistget-inputlistgetop","title":"iree_input.list.get
(Input::ListGetOp)","text":"Element accessor
Syntax:
operation ::= `iree_input.list.get` $list `[` $index `]` attr-dict `:` type($list) `->` type($result)\n
Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_10","title":"Operands:","text":"Operand Description list
list index
index"},{"location":"reference/mlir-dialects/IREEInput/#results_11","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistresize-inputlistresizeop","title":"iree_input.list.resize
(Input::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `iree_input.list.resize` operands attr-dict `:` type($list)\n
Resizes the list to contain new_size
elements. This will either truncate the list if the existing size is greater than new_size
or extend the list with the default list value of the element type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_11","title":"Operands:","text":"Operand Description list
list new_size
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistset-inputlistsetop","title":"iree_input.list.set
(Input::ListSetOp)","text":"Element mutator
Syntax:
operation ::= `iree_input.list.set` $list `[` $index `]` `,` $value attr-dict `:` type($list) `,` type($value)\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_12","title":"Operands:","text":"Operand Description list
list index
index value
any type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputlistsize-inputlistsizeop","title":"iree_input.list.size
(Input::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `iree_input.list.size` operands attr-dict `:` type($list)\n
Returns the current size of the list in elements.
Interfaces: InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_13","title":"Operands:","text":"Operand Description list
list"},{"location":"reference/mlir-dialects/IREEInput/#results_12","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#pseudo-ops-for-conversion-support","title":"Pseudo ops for conversion support","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorexport-inputtensorexportop","title":"iree_input.tensor.export
(Input::TensorExportOp)","text":"Exports a tensor to a Buffer(View), capturing dynamic dims
Syntax:
operation ::= `iree_input.tensor.export` $source `:` type($source) (`{` $source_dims^ `}`)? `->` type($target)\n attr-dict-with-keyword\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_14","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_13","title":"Results:","text":"Result Description target
Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorimport-inputtensorimportop","title":"iree_input.tensor.import
(Input::TensorImportOp)","text":"Imports a Buffer(View) to a tensor, providing dynamic dims
Syntax:
operation ::= `iree_input.tensor.import` $source `:` type($source) `->` type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_15","title":"Operands:","text":"Operand Description source
Buffer is an untyped bag of bits with no shape or dtype or View into a buffer, with runtime shape and element type target_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_14","title":"Results:","text":"Result Description target
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorbitcast-inputtensorbitcastop","title":"iree_input.tensor.bitcast
(Input::TensorBitCastOp)","text":"Bitcasts a tensor
Syntax:
operation ::= `iree_input.tensor.bitcast` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Bitcasts a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_16","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_15","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorclone-inputtensorcloneop","title":"iree_input.tensor.clone
(Input::TensorCloneOp)","text":"Performs a full tensor clone operation
Syntax:
operation ::= `iree_input.tensor.clone` $operand `:` type($result) (`{` $operand_dims^ `}`)?\n attr-dict-with-keyword\n
Clones the input tensor into an identical output tensor.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_17","title":"Operands:","text":"Operand Description operand
ranked tensor of any type values operand_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_16","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorload-inputtensorloadop","title":"iree_input.tensor.load
(Input::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `iree_input.tensor.load` $source (`[` $indices^ `]`)? `:`\n type($source) (`{` $source_dims^ `}`)?\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_18","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_17","title":"Results:","text":"Result Description result
index or signless integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorreshape-inputtensorreshapeop","title":"iree_input.tensor.reshape
(Input::TensorReshapeOp)","text":"Reshapes a tensor
Syntax:
operation ::= `iree_input.tensor.reshape` $source `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Reshapes a tensor to a new shape without modifying the contents.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_19","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_18","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorslice-inputtensorsliceop","title":"iree_input.tensor.slice
(Input::TensorSliceOp)","text":"Slices out a subregion of a tensor
Syntax:
operation ::= `iree_input.tensor.slice` $source `[` $start_indices `for` $lengths `]` `:`\n type($source) (`{` $source_dims^ `}`)? `->`\n type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Clones a subregion of a tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_20","title":"Operands:","text":"Operand Description source
ranked tensor of any type values source_dims
variadic of index start_indices
variadic of index lengths
variadic of index result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_19","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorsplat-inputtensorsplatop","title":"iree_input.tensor.splat
(Input::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `iree_input.tensor.splat` $value `:` type($result) (`{` $result_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_21","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type result_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_20","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorstore-inputtensorstoreop","title":"iree_input.tensor.store
(Input::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `iree_input.tensor.store` $value `,` $target (`[` $indices^ `]`)? `:`\n type($target) (`{` $target_dims^ `}`)?\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_22","title":"Operands:","text":"Operand Description value
index or signless integer or floating-point or complex-type or vector of any type values target
ranked tensor of any type values target_dims
variadic of index indices
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_21","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensortrace-inputtensortraceop","title":"iree_input.tensor.trace
(Input::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `iree_input.tensor.trace` $key `=` `[`\n custom<ShapedOperandList>($values, type($values), $value_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/IREEInput/#operands_23","title":"Operands:","text":"Operand Description values
variadic of ranked tensor of any type values value_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputtensorupdate-inputtensorupdateop","title":"iree_input.tensor.update
(Input::TensorUpdateOp)","text":"Updates a tensor with the contents of another tensor
Syntax:
operation ::= `iree_input.tensor.update` $update `,` $target `[` $start_indices `]` `:`\n type($update) (`{` $update_dims^ `}`)? `->`\n custom<ShapedTiedResult>(type($result), $target_dims)\n attr-dict-with-keyword\n
Updates the target tensor with the contents of the update tensor at the given offset indices.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_24","title":"Operands:","text":"Operand Description target
ranked tensor of any type values target_dims
variadic of index start_indices
variadic of index update
ranked tensor of any type values update_dims
variadic of index"},{"location":"reference/mlir-dialects/IREEInput/#results_22","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREEInput/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputalign-inputalignop","title":"iree_input.align
(Input::AlignOp)","text":"Aligns up to a power-of-two alignment if required
Syntax:
operation ::= `iree_input.align` $value `,` $alignment attr-dict `:` type($result)\n
Aligns |value| up to the given power-of-two |alignment| if required.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#operands_25","title":"Operands:","text":"Operand Description value
signless-integer-like alignment
signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#results_23","title":"Results:","text":"Result Description result
signless-integer-like"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputnull-inputnullop","title":"iree_input.null
(Input::NullOp)","text":"A null value
Syntax:
operation ::= `iree_input.null` attr-dict `:` type($result)\n
Initializes reference and variant types with a null value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#results_24","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/IREEInput/#workgroup-dispatch-ops","title":"Workgroup dispatch ops","text":""},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupcount-inputdispatchworkgroupcountop","title":"iree_input.dispatch.workgroup.count
(Input::DispatchWorkgroupCountOp)","text":"Returns the total workgroup count of the grid
Syntax:
operation ::= `iree_input.dispatch.workgroup.count` `[` $dimension `]` attr-dict `:` type($result)\n
The total number of workgroups along each dimension in the dispatch grid.
Corresponds to the NumWorkgroups
SPIR-V built-in and the gridDim
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.count[0] : index\n%y = iree_input.dispatch.workgroup.count[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_25","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupid-inputdispatchworkgroupidop","title":"iree_input.dispatch.workgroup.id
(Input::DispatchWorkgroupIDOp)","text":"Returns the index of the current workgroup in the grid
Syntax:
operation ::= `iree_input.dispatch.workgroup.id` `[` $dimension `]` attr-dict `:` type($result)\n
The global workgroup ID of the current workgroup in the range of [0, iree_input.dispatch.workgroup.count)
along each dimension.
Corresponds to the WorkgroupId
SPIR-V built-in and the blockIdx
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.id[0] : index\n%y = iree_input.dispatch.workgroup.id[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_26","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#iree_inputdispatchworkgroupsize-inputdispatchworkgroupsizeop","title":"iree_input.dispatch.workgroup.size
(Input::DispatchWorkgroupSizeOp)","text":"Returns the size of each workgroup in invocations
Syntax:
operation ::= `iree_input.dispatch.workgroup.size` `[` $dimension `]` attr-dict `:` type($result)\n
The number of local invocations within the current workgroup along each dimension. Depending on backend this may map to the SIMT thread count or inner loop nest parameters.
Workgroup sizes are not determined at the iree dialect level as they are dependent on the target backend determined when lowering into the HAL. It's still possible to use the symbolic workgroup size inside of dispatch executables as a placeholder for the resolved value once in the HAL.
Corresponds to the WorkgroupSize
SPIR-V built-in and the blockDim
CUDA built-in variable, only in the iree dialect the number of dimensions is not restricted to 3 (XYZ).
%x = iree_input.dispatch.workgroup.size[0] : index\n%y = iree_input.dispatch.workgroup.size[1] : index\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREEInput/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/IREEInput/#results_27","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/IREEInput/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetbindingattr","title":"DescriptorSetBindingAttr","text":"descriptor set binding specification
Syntax:
#iree_input.descriptor_set.binding<\n int64_t, # ordinal\n DescriptorType, # type\n std::optional<DescriptorFlags> # flags\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
type DescriptorType
flags std::optional<DescriptorFlags>
"},{"location":"reference/mlir-dialects/IREEInput/#descriptorsetlayoutattr","title":"DescriptorSetLayoutAttr","text":"descriptor set layout specification
Syntax:
#iree_input.descriptor_set.layout<\n int64_t, # ordinal\n ::llvm::ArrayRef<DescriptorSetBindingAttr>, # bindings\n std::optional<DescriptorSetLayoutFlags> # flags\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description ordinal int64_t
bindings ::llvm::ArrayRef<DescriptorSetBindingAttr>
flags std::optional<DescriptorSetLayoutFlags>
"},{"location":"reference/mlir-dialects/IREEInput/#descriptortypeattr","title":"DescriptorTypeAttr","text":"valid DescriptorType
Syntax:
#iree_input.descriptor_type<\n ::mlir::iree_compiler::IREE::Input::DescriptorType # value\n>\n
Enum cases: * uniform_buffer (UniformBuffer
) * storage_buffer (StorageBuffer
)
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::Input::DescriptorType
an enum of type DescriptorType"},{"location":"reference/mlir-dialects/IREEInput/#devicetargetattr","title":"DeviceTargetAttr","text":"generic device target specification
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description deviceID StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectattr","title":"ExecutableObjectAttr","text":"executable object reference
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description path StringAttr
data DenseIntElementsAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executableobjectsattr","title":"ExecutableObjectsAttr","text":"target-specific object file references
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description targets ArrayAttr
targetObjects ArrayAttr
"},{"location":"reference/mlir-dialects/IREEInput/#executabletargetattr","title":"ExecutableTargetAttr","text":"generic executable target specification
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_6","title":"Parameters:","text":"Parameter C++ type Description backend StringAttr
format StringAttr
configuration DictionaryAttr
"},{"location":"reference/mlir-dialects/IREEInput/#pipelinelayoutattr","title":"PipelineLayoutAttr","text":"executable entry point layout specification
Syntax:
#iree_input.pipeline.layout<\n int64_t, # pushConstants\n ::llvm::ArrayRef<DescriptorSetLayoutAttr> # setLayouts\n>\n
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_7","title":"Parameters:","text":"Parameter C++ type Description pushConstants int64_t
setLayouts ::llvm::ArrayRef<DescriptorSetLayoutAttr>
"},{"location":"reference/mlir-dialects/IREEInput/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#list","title":"list","text":"A mutable, resizable list of some type.
"},{"location":"reference/mlir-dialects/IREEInput/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/IREEInput/#buffertype","title":"BufferType","text":"Buffer is an untyped bag of bits with no shape or dtype
Syntax: !iree_input.buffer
Buffers represent an untyped bag of bits that can be reinterpreted depending on a use case using buffer_view
operation. Buffers can be used for packing multiple tensors into the same underlying storage. It is left to higher level code to decide how exactly tensors layed out in the buffer.
"},{"location":"reference/mlir-dialects/IREEInput/#bufferviewtype","title":"BufferViewType","text":"View into a buffer, with runtime shape and element type
Syntax: !iree_input.buffer_view
BufferViews represent views onto backing IREE runtime Buffer objects, adding runtime shape and element type parameters to the backing buffer. BufferViews are typically accepted and returned at boundaries with external code.
In the runtime and lower level compiler, BufferView's are fully modeled; however, as boundary types, not all features are exposed publicly. Since within compiled tensor programs, it is typical to operate in terms of fully typed tensors, the primary mechanism for getting or using a BufferView at the high level is by casting to/from a tensor. It is left to higher level code to ensure that aliasing rules are enforced at such boundaries.
"},{"location":"reference/mlir-dialects/IREEInput/#bytebuffertype","title":"ByteBufferType","text":"a reference counted byte buffer
Syntax: !iree_input.byte_buffer
A reference counted byte buffer that models a pointer, offset, and length.
"},{"location":"reference/mlir-dialects/IREEInput/#listtype","title":"ListType","text":"A one dimensional list of runtime values
Represents a list of arbitrary type. Primitive types can be expected to be efficiently stored in an unboxed form. Reference types and variants are permitted.
Lists can either be homogenous, with a fixed element type, or heterogenous by parameterizing them with a VariantType.
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_8","title":"Parameters:","text":"Parameter C++ type Description elementType ::mlir::Type
A type suitable as an element type of a container"},{"location":"reference/mlir-dialects/IREEInput/#ptrtype","title":"PtrType","text":"Pointer to a concrete type
"},{"location":"reference/mlir-dialects/IREEInput/#parameters_9","title":"Parameters:","text":"Parameter C++ type Description targetType ::mlir::Type
A type suitable as a target type of a pointer"},{"location":"reference/mlir-dialects/IREEInput/#varianttype","title":"VariantType","text":"Represents any legal or reference type in the IREE runtime
Syntax: !iree_input.variant
The variant type is typically used to parameterize container types that can contain any legal primitive, reference or null in the IREE type system.
"},{"location":"reference/mlir-dialects/IREELinalgExt/","title":"IREELinalgExt","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_ext-dialect","title":"'iree_linalg_ext' Dialect","text":"IREE Linalg Extensions.
A dialect designed for experimenting with non-structured operations that cannot be represented efficiently/directly by the Linalg dialect.
- 'iree_linalg_ext' Dialect
- Operation definition
- Data tiling ops
- iree_linalg_ext.pack (LinalgExt::PackOp)
- iree_linalg_ext.set_encoding (LinalgExt::SetEncodingOp)
- iree_linalg_ext.unpack (LinalgExt::UnPackOp)
- iree_linalg_ext.unset_encoding (LinalgExt::UnsetEncodingOp)
- iree_linalg_ext.upper_bound_tile_size (LinalgExt::UpperBoundTileSizeOp)
- Non-structured ops
- iree_linalg_ext.attention (LinalgExt::AttentionOp)
- iree_linalg_ext.fft (LinalgExt::FftOp)
- iree_linalg_ext.reverse (LinalgExt::ReverseOp)
- iree_linalg_ext.scan (LinalgExt::ScanOp)
- iree_linalg_ext.scatter (LinalgExt::ScatterOp)
- iree_linalg_ext.sort (LinalgExt::SortOp)
- iree_linalg_ext.topk (LinalgExt::TopkOp)
- Utility ops
- iree_linalg_ext.transform.do_not_dce_operands (LinalgExt::DoNotDCEOperandsOp)
- iree_linalg_ext.yield (LinalgExt::YieldOp)
- Winograd ops
- iree_linalg_ext.winograd.input_transform (LinalgExt::WinogradInputTransformOp)
- iree_linalg_ext.winograd.output_transform (LinalgExt::WinogradOutputTransformOp)
- Attribute definition
- EncodingAttr
- EncodingRoleAttr
- EncodingUserAttr
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#data-tiling-ops","title":"Data tiling ops","text":"Operations for working with data layouts, padding, encodings, and other properties useful for tiling computations across iteration space dimensions.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extpack-linalgextpackop","title":"iree_linalg_ext.pack
(LinalgExt::PackOp)","text":"Pack operation
Syntax:
operation ::= `iree_linalg_ext.pack` attr-dict\n $inputs\n (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?\n (`outer_dims_perm` `=` $outer_dims_perm^)?\n `inner_dims_pos` `=` $inner_dims_pos\n `inner_tiles` `=`\n custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)\n `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n (`->` type($results)^)?\n
The pack operation converts an input
into a tiled and packed layout. The dimensions to be tiled are obtained from inner_dims_pos
and the size of the tile is obtained from inner_tiles
. The dimensions listed in inner_dims_pos
do not need to be contiguous in which case the tile will get transposed. We handle only full tiles if padding_value
is not set; it is UB if the tile does not perfectly divide the dimension. If padding_value
is set, it will pad along high dimensions, i.e., it pads at the bottom and on the right if the input has rank 2, and the result type shape, will be dynamic in any dimension if and only if the input shape is. As optional input, the operation takes outer_dims_perm
that allows to permute the tiled loops.
Example KC_to_KCck:
iree_linalg_ext.pack %arg0 inner_dims_pos = [1, 0]\n inner_tiles = [32, 8] into %arg1 : (memref<128x256xf32> memref<16x8x32x8xf32>)\n
Example NC_to_NCnc:
iree_linalg_ext.pack %arg0 inner_dims_pos = [0, 1]\n inner_tiles = [8, 32] into %arg1 : (memref<128x256xf32> memref<16x8x8x32xf32>)\n
Example KC_to_CKkc iree_linalg_ext.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n inner_tiles = [32, 8] into %arg1 : (memref<128x256xf32> memref<32x4x32x8xf32>)\n
In all cases, dimension at position 0 in the input memref (128) is tiled with a factor of 8, while dimension at position 1 (256) is tiled with a factor of 32. In the KC_to_KCck example, the point loops are interchanged, while in the KC_to_CKkc example the tiled loops.
Example NC_to_NCnc with padding:
iree_linalg_ext.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]\n inner_tiles = [8, 2] into %arg1 : (memref<13x15xf32> memref<2x8x8x2xf32>)\n
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription outer_dims_perm
::mlir::DenseI64ArrayAttri64 dense array attribute inner_dims_pos
::mlir::DenseI64ArrayAttri64 dense array attribute static_inner_tiles
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values inner_tiles
variadic of index padding_value
any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#results","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extset_encoding-linalgextsetencodingop","title":"iree_linalg_ext.set_encoding
(LinalgExt::SetEncodingOp)","text":"Perform pack and pad operation on source
Syntax:
operation ::= `iree_linalg_ext.set_encoding` attr-dict $source `:` type($source) `->` type($result)\n
Operation to assign an encoding to a tensor. The operation does not change the rank or extent of a tensor. Instead it adds an encoding attribute to the tensor type to represent a change in layout.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_1","title":"Operands:","text":"Operand Description source
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_1","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extunpack-linalgextunpackop","title":"iree_linalg_ext.unpack
(LinalgExt::UnPackOp)","text":"Unpack operation
Syntax:
operation ::= `iree_linalg_ext.unpack` attr-dict\n $inputs\n (`outer_dims_perm` `=` $outer_dims_perm^)?\n `inner_dims_pos` `=` $inner_dims_pos\n `inner_tiles` `=`\n custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)\n `into` $outputs `:` `(` type($inputs) type($outputs) `)`\n (`->` type($results)^)?\n
The unpack operation converts a tiled and packed input to an unpacked output. See pack
for more details on inner_tiles
and dims_pos
; it is UB if the tile does not perfectly divide the dimension. Optionally, the operation also supports permuting the tiled loops.
Example KCck_to_KC:
iree_linalg_ext.unpack %arg0 dims_pos = [1, 0]\n inner_tiles = [32, 8] into %arg1 : (memref<16x8x32x8xf32> memref<128x256xf32>)\n
Example NCnc_to_NC:
iree_linalg_ext.unpack %arg0 dims_pos = [0, 1]\n inner_tiles = [8, 32] into %arg1 : (memref<16x8x8x32xf32> memref<128x256xf32>)\n
Example CKkc_to_KC:
iree_linalg_ext.unpack %arg1 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]\n inner_tiles = [32, 8] into %arg0 : (memref<32x4x32x8xf32> memref<128x256xf32>)\n
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription outer_dims_perm
::mlir::DenseI64ArrayAttri64 dense array attribute inner_dims_pos
::mlir::DenseI64ArrayAttri64 dense array attribute static_inner_tiles
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_2","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values inner_tiles
variadic of index"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_2","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extunset_encoding-linalgextunsetencodingop","title":"iree_linalg_ext.unset_encoding
(LinalgExt::UnsetEncodingOp)","text":"Perfom unpack and extract operation on source
Syntax:
operation ::= `iree_linalg_ext.unset_encoding` attr-dict $source `:` type($source) `->` type($result)\n
Operation to convert an tensor with encoding that represents its data layout into a tensor with default layout (i.e. no encoding). For now in IREE the default layout is row-major.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), ReifyRankedShapedTypeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_3","title":"Operands:","text":"Operand Description source
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_3","title":"Results:","text":"Result Description result
ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extupper_bound_tile_size-linalgextupperboundtilesizeop","title":"iree_linalg_ext.upper_bound_tile_size
(LinalgExt::UpperBoundTileSizeOp)","text":"Returns an upper bound on tile sizes
Syntax:
operation ::= `iree_linalg_ext.upper_bound_tile_size` attr-dict $tensorType `->` type($results)\n
This returns the largest tile sizes that might result from materialization of the given encoding. This can be used outside of target-specific code, so there may be multiple targets, and this will return the maximum tile size from iterating over all of them. The evaluation happens in the MaterializeUpperBoundTileSize pass.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription tensorType
::mlir::TypeAttrtype attribute of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_4","title":"Results:","text":"Result Description results
variadic of index"},{"location":"reference/mlir-dialects/IREELinalgExt/#non-structured-ops","title":"Non-structured ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extattention-linalgextattentionop","title":"iree_linalg_ext.attention
(LinalgExt::AttentionOp)","text":"Attention operator
Syntax:
operation ::= `iree_linalg_ext.attention` attr-dict\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($results)^)?\n
This operator takes in 3 tensors: query(Q), key(K) and value(V) and computes the attention. For self-attention, all inputs have the same shape BxNxd where B is the of the batch dimension, N is the sequence length and d is head dimension. Typically N >>> d. Mathematically, the attention is defined as matmul(softmax(matmul(Q, transpose(K))), V) and has shape BxNxd. Usually, this operator also performs scaling, masking and dropout, but we leave that out of the current implementation. For cross-attention, the query and output have the same shape (BxNxd), while the key and value differ in sequence length (they have shape BxLxd, where L != N). This operator after tiling results in a tiled result as per flash attention and results in the current max
and sum
statistics while processing the current tile.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_4","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_5","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extfft-linalgextfftop","title":"iree_linalg_ext.fft
(LinalgExt::FftOp)","text":"Fft operator
Syntax:
operation ::= `iree_linalg_ext.fft` attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n (`:` type($results)^)?\n
Apply 1D FFT to innermost dim. This is an iterative FFT, not recurrsive. Thus, the bit reversal is assumed applied on the input. The op carries an input -- stage, which indicates the level of reduction loop in the algorithm. It represents the computation body. For more details, see \"Data reordering, bit reversal, and in-place algorithms\" section in https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm
The size of innermost dim is expected to be a power of 2.
It is optional to carry coefficient tensors/buffers as inputs. In this context, they will be the second and third inputs.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_5","title":"Operands:","text":"Operand Description inputs
variadic of any type outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_6","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extreverse-linalgextreverseop","title":"iree_linalg_ext.reverse
(LinalgExt::ReverseOp)","text":"Reverse operator
Syntax:
operation ::= `iree_linalg_ext.reverse` attr-dict `dimensions` `(` $dimensions `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n (`outs` `(` $outputs^ `:` type($outputs) `)`)?\n (`:` type($results)^)?\n
A temporary solution for lowering reverse ops into IREE, allowing IREE to tile and distribute them. }
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription dimensions
::mlir::DenseIntElementsAttr64-bit signless integer elements attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_6","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_7","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extscan-linalgextscanop","title":"iree_linalg_ext.scan
(LinalgExt::ScanOp)","text":"Scan operator
Syntax:
operation ::= `iree_linalg_ext.scan` attr-dict\n `dimension` `(` $dimension `)`\n `inclusive` `(` $inclusive `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Computes the inclusive/exclusive scan along a given dimension.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute inclusive
::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_7","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_8","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extscatter-linalgextscatterop","title":"iree_linalg_ext.scatter
(LinalgExt::ScatterOp)","text":"Scatter operator
Syntax:
operation ::= `iree_linalg_ext.scatter` attr-dict `dimension_map` `=` $dimension_map\n `unique_indices` `(` $unique_indices `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Based on XLA operation semantics, takes two inputs
(update
and indices
) and outputs
value (original
). The operation updates the value at the slices specified by indices
by combining the current value with the value in updates
using the computation specified in region
. The region
specifies a binary operation of signature (T, T) -> T, where T
is the element-type of updates
(and original
). The first argument correspond the value to be updated (i.e. from updates
), and the second the current value (i.e. value from original
).
The indices
is a 2D tensor/memref type. The first dim is the number of updates, and the second dim is index depth. The index depth should always be static.
The first dim of updates
and indices
is identical, since they represent the number of updates.
The rank of the original
/result
is at least index_depth + rank(%updates) - 1
. The first index_depth
indices are derived from indices
and the shape of update value has the last rank(%original) - index_depth values match %(originals) last dimensions, with the previous dims extending from the index offsets.
The dimension_map attributes describes which index value maps to which dimension in the destionation. It cannot contain duplicate values, must have as many entries as index depth, and values must be within the rank of the destination.
The unique_indices attribute carries the information whether all the indices are unique. If there are repeated indices, the first iteration loop will be marked as reduction.
The shapes definition follows tensorflow operations execept that it force batch dims to be 1D. See more information in https://www.tensorflow.org/api_docs/python/tf/tensor_scatter_nd_update
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension_map
::mlir::DenseI64ArrayAttri64 dense array attribute unique_indices
::mlir::BoolAttrbool attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_8","title":"Operands:","text":"Operand Description inputs
variadic of ranked tensor or memref of any type values outputs
variadic of ranked tensor or memref of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_9","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extsort-linalgextsortop","title":"iree_linalg_ext.sort
(LinalgExt::SortOp)","text":"Sort operator
Syntax:
operation ::= `iree_linalg_ext.sort` attr-dict\n `dimension` `(` $dimension `)`\n (`ins` `(` $inputs^ `:` type($inputs) `)`)?\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
Based on XLA operation semantics, sorts the given operands
at the given dimension
with the given comparator
.
See https://www.tensorflow.org/xla/operation_semantics#sort.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_9","title":"Operands:","text":"Operand Description inputs
variadic of any type outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_10","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_exttopk-linalgexttopkop","title":"iree_linalg_ext.topk
(LinalgExt::TopkOp)","text":"Top-K operator
Syntax:
operation ::= `iree_linalg_ext.topk` attr-dict\n `dimension` `(` $dimension `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n $region (`->` type($results)^)?\n
A Top-K operation for N-D tensors. Reduces the target dimension from the input size N down to K elements based on the supplied binary region.
Accepts an N-D tensor input consisting of values and an optioanl N-D tensor for indices of those values (i32 type). If input indices aren't provided, the index mapping is inferred based on the k dim. Both input values/indices tensors and output values/indicies tensors must have the same shape. Top-K is computed along the target dimension (from dimension()). Returns two output tensors of values and the indicies of Top-K results. The output dimensions must match the input save for the dimension that is reduced to K results.
Region accepts lhs=[next N input] and rhs=[exiting K output] and yeilds an i1. If true, the two values are swapped: - For Top-K compoarision: > - For Min-K comparision: < Note: when the two values are equal, the first occurence is always selected.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, LinalgExtOp, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription dimension
::mlir::IntegerAttr64-bit signless integer attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_10","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_11","title":"Results:","text":"Result Description results
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_exttransformdo_not_dce_operands-linalgextdonotdceoperandsop","title":"iree_linalg_ext.transform.do_not_dce_operands
(LinalgExt::DoNotDCEOperandsOp)","text":"Unfoldable op that just keeps its operands live
Syntax:
operation ::= `iree_linalg_ext.transform.do_not_dce_operands` attr-dict $operands `:` type($operands)\n
Unfoldable op that just keeps its operands live. This is to use with the transform dialect in case where transforms introduce IR that would be otherwise DCE'd by canonicalizations.
This op should be added to the transform dialect in the fullness of time but it can't be registered dynamically on the IREE side as that triggers errors since the op does not implement any transform interface.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_11","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extyield-linalgextyieldop","title":"iree_linalg_ext.yield
(LinalgExt::YieldOp)","text":"LinalgExt yield op
Syntax:
operation ::= `iree_linalg_ext.yield` attr-dict ($operands^ `:` type($operands))?\n
iree_linalg_ext.yield
is a special terminator operation for blocks inside regions in iree_linalg_ext
ops.
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_12","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/IREELinalgExt/#winograd-ops","title":"Winograd ops","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extwinogradinput_transform-linalgextwinogradinputtransformop","title":"iree_linalg_ext.winograd.input_transform
(LinalgExt::WinogradInputTransformOp)","text":"Winograd Input Transform operator
Syntax:
operation ::= `iree_linalg_ext.winograd.input_transform` attr-dict\n `output_tile_size` `(` $output_tile_size `)`\n `kernel_size` `(` $kernel_size `)`\n `image_dimensions` `(` $image_dimensions `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($result)^)?\n
This operator is the first step in converting a convolution to its Winograd equivalent. Given a tile of an input image (I), this operator computes matmul(tranpose(B), matmul(I, B)). The input tile is assumed to be square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. B is a constant 2-d square matrix of the same shape as the input tile I. The input to the operator is an image of shape (N, H, W, C) or (N, C, H, W) and the output is an operator of shape (m + r - 1, m + r - 1, N, H', W', C) where H' = ceil((H - m + 1)/r) and W' = ceil((W - m + 1)/r). The result of this operator is first collapsed and then fed to a batch matmul op.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription output_tile_size
::mlir::IntegerAttr64-bit signless integer attribute kernel_size
::mlir::IntegerAttr64-bit signless integer attribute image_dimensions
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_13","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_12","title":"Results:","text":"Result Description result
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#iree_linalg_extwinogradoutput_transform-linalgextwinogradoutputtransformop","title":"iree_linalg_ext.winograd.output_transform
(LinalgExt::WinogradOutputTransformOp)","text":"Winograd Output Transform operator
Syntax:
operation ::= `iree_linalg_ext.winograd.output_transform` attr-dict\n `output_tile_size` `(` $output_tile_size `)`\n `kernel_size` `(` $kernel_size `)`\n `image_dimensions` `(` $image_dimensions `)`\n `ins` `(` $inputs `:` type($inputs) `)`\n `outs` `(` $outputs `:` type($outputs) `)`\n (`->` type($result)^)?\n
This operator is the last transform in converting a convolution to its Winograd equivalent. After convolution in the Winograd domain (which turns into an elementwise product for a single channel and batch matrix multiplication for many channels), this operator converts the output back into the original domain. Given a tile of the output (O) in the Winograd domain, this operator computes matmul(transpose(A), matmul(O, A)). The output tile is square with each side of size m + r - 1, where the convolutional kernel is m x m and the output tile size is r x r. A is a constant 2-d matrix of shape (m + r - 1) x r. The input to the operator is a tensor of shape (m + r - 1, m + r - 1, N, H', W', C) and the output is a tensor of shape (N, H, W, C) or (N, C, H, W) where H = r H' and W = r W'. This operator is followed by a tensor.extract_slice which extracts only the non-padded part of the output.
Traits: AttrSizedOperandSegments, SingleBlock, SingleBlockImplicitTerminator<::mlir::iree_compiler::IREE::LinalgExt::YieldOp>
Interfaces: DestinationStyleOpInterface, LinalgExtInterface, MemoryEffectOpInterface, ReifyRankedShapedTypeOpInterface, TilingInterface
"},{"location":"reference/mlir-dialects/IREELinalgExt/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription output_tile_size
::mlir::IntegerAttr64-bit signless integer attribute kernel_size
::mlir::IntegerAttr64-bit signless integer attribute image_dimensions
::mlir::DenseI64ArrayAttri64 dense array attribute"},{"location":"reference/mlir-dialects/IREELinalgExt/#operands_14","title":"Operands:","text":"Operand Description inputs
variadic of shaped of any type values outputs
variadic of shaped of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#results_13","title":"Results:","text":"Result Description result
variadic of ranked tensor of any type values"},{"location":"reference/mlir-dialects/IREELinalgExt/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREELinalgExt/#encodingattr","title":"EncodingAttr","text":"information to decide how to data-tile a tensor
Syntax:
#iree_linalg_ext.encoding<\n EncodingUserAttr, # user\n EncodingRoleAttr, # role\n ArrayAttr, # element_types\n TypeAttr, # original_type\n IntegerAttr, # matmul_narrow_M\n IntegerAttr # matmul_narrow_N\n>\n
This attribute describes the change in the layout for a given tensor to execute subsequent operations on the tiled layout. The encoding serves as a way to represent the change in the way the data is laid out in memory without changing the logical rank/extent of the tensor itself. When required, the encoding can be used to explicitly manifest the layout change through operations like pack/unpack.
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description user EncodingUserAttr
kind of operation using this tensor role EncodingRoleAttr
role of this tensor as an operand element_types ArrayAttr
element types of the user's operands original_type TypeAttr
type of the original tensor type before padding matmul_narrow_M IntegerAttr
optional M narrow dimension size (only for MATMUL and BATCH_MATMUL users) matmul_narrow_N IntegerAttr
optional N narrow dimension size (only for MATMUL and BATCH_MATMUL users)"},{"location":"reference/mlir-dialects/IREELinalgExt/#encodingroleattr","title":"EncodingRoleAttr","text":"Describes the role of the tensor as an operand or a result of an operation.
Syntax:
#iree_linalg_ext.role<\n ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole # value\n>\n
Enum cases: * LHS (LHS
) * RHS (RHS
) * RESULT (RESULT
)
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::LinalgExt::EncodingRole
an enum of type EncodingRole"},{"location":"reference/mlir-dialects/IREELinalgExt/#encodinguserattr","title":"EncodingUserAttr","text":"Describes the operation that a tensor is an operand or a result of.
Syntax:
#iree_linalg_ext.user<\n ::mlir::iree_compiler::IREE::LinalgExt::EncodingUser # value\n>\n
Enum cases: * MATMUL (MATMUL
) * BATCH_MATMUL (BATCH_MATMUL
)
"},{"location":"reference/mlir-dialects/IREELinalgExt/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description value ::mlir::iree_compiler::IREE::LinalgExt::EncodingUser
an enum of type EncodingUser"},{"location":"reference/mlir-dialects/IREEVectorExt/","title":"IREEVectorExt","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_ext-dialect","title":"'iree_vector_ext' Dialect","text":"IREE Vector Extensions.
A dialect designed for experimenting with vector operations beyond what is currently available in the Vector Dialect.
- 'iree_vector_ext' Dialect
- Operation definition
- iree_vector_ext.layout_conflict_resolution (VectorExt::LayoutConflictResolutionOp)
- Attribute definition
- LayoutAttr
- PerDimLayoutAttr
"},{"location":"reference/mlir-dialects/IREEVectorExt/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#iree_vector_extlayout_conflict_resolution-vectorextlayoutconflictresolutionop","title":"iree_vector_ext.layout_conflict_resolution
(VectorExt::LayoutConflictResolutionOp)","text":"Layout Conflict Resolution operator
Syntax:
operation ::= `iree_vector_ext.layout_conflict_resolution` $input attr-dict `:` type($input) `->` type($output)\n
The layout conflict resolution operator takes a vector and a desired layout and transforms the vector to one with the desired layout.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription sourceLayout
::mlir::iree_compiler::IREE::VectorExt::LayoutAttrhigh-dimensional vector register layout for a given vector desiredLayout
::mlir::iree_compiler::IREE::VectorExt::LayoutAttrhigh-dimensional vector register layout for a given vector"},{"location":"reference/mlir-dialects/IREEVectorExt/#operands","title":"Operands:","text":"Operand Description input
vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#results","title":"Results:","text":"Result Description output
vector of any type values"},{"location":"reference/mlir-dialects/IREEVectorExt/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/IREEVectorExt/#layoutattr","title":"LayoutAttr","text":"high-dimensional vector register layout for a given vector
This contains a complete specification of the layout for a given vector, whereas the attribute above only specifies the per dimension layout.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#parameters","title":"Parameters:","text":"Parameter C++ type Description layouts ::llvm::ArrayRef<PerDimLayoutAttr>
layout for each dimension of the vector"},{"location":"reference/mlir-dialects/IREEVectorExt/#perdimlayoutattr","title":"PerDimLayoutAttr","text":"high-dimensional vector register layout for a given vector dimension
This attribute describes the per dimension register layout for a given vector that could be prescribed by an operator such as matrix multiplication. This is a way to explicitly represent the layout in the IR when it is in the SIMD form prior to converting to the SIMT form so that we can reason about layouts, propagating layouts and layout conflicts.
"},{"location":"reference/mlir-dialects/IREEVectorExt/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description labels ::llvm::ArrayRef<std::string>
labels for the high dimensional layout dims shapes ::llvm::ArrayRef<int64_t>
shapes for the high dimensional layout dims"},{"location":"reference/mlir-dialects/Stream/","title":"Stream","text":""},{"location":"reference/mlir-dialects/Stream/#stream-dialect","title":"'stream' Dialect","text":"A dialect designed to model execution partitioning and scheduling.
The stream dialect is designed to take tensor programs and convert them to explicitly scheduled asynchronous programs. This includes placing ops on specific targets, partitioning the work between the targets, scheduling the work for concurrency, and encoding tensors into target-specific resources.
+--------+ +----------+ +-------+\n| flow.* | -> | stream.* | -> | hal.* |\n+--------+ +----------+ +-------+\n
This sits in-between the flow
and hal
dialects.
-
flow
models tensor programs by separating work into dispatchable functions in order to isolate the main host program data flow and the dense tensor compute operations.
-
stream
models explicitly scheduled asynchronous programs by partitioning the dispatchable work, specifying target affinities, encoding tensors into target-specific forms, and scheduling the work to run concurrently.
-
hal
models a low-level hardware abstraction layer used to manage buffers and issue asynchronous work across a variety of device types. The dialect is largely 1:1 with the IREE HAL C API.
Transforms in the dialect lower tensor values into opaque resources with the goal of ensuring no tensors survive in the IR. At entry stream.tensor.*
ops are used to capture the source tensor encoding information (data type, shapes, etc) and then lowered into stream.async.*
ops that model the asynchronous workloads on the opaque resources. The asynchronous operations are then partitioned, allocated, and scheduled for execution using the stream.cmd.*
ops.
It's intended that after transformation through the stream dialect the program is ready for execution on an abstract machine. At this level of representation buffers have still not been allocated and devices are not yet resolved, however the information captured in the stream
IR allows such operations to be done trivially. To this end all ops carry the symbolic size of the resources on which they operate as well as the lifetime of the resources they are acting upon. This manifests in the usage of the !stream.resource
type:
// Unresolved lifetime (resolved during the iree-stream-refine-usage pass):\n!stream.resource<*>\n// An externally managed value (passed in via the program API).\n!stream.resource<external>\n// A staging buffer for uploads/downloads.\n!stream.resource<staging>\n// A short-lived value that is used across streams.\n!stream.resource<transient>\n// A long-lived value that persists across streams in globals.\n!stream.resource<variable>\n// An immutable value that persists for the duration of the program.\n!stream.resource<constant>\n
Operations using resources carry the size of all operand result resources:
// %update (40 bytes) is being inserted into %target (296 bytes).\n// Can be dynamic values such as those originating from dynamic dimensions.\n%13 = stream.async.update %update, %target[%c256 to %c296] :\n !stream.resource<transient>{%c40} ->\n %target as !stream.resource<transient>{%c296}\n
Once all stream.async.*
work is moved into executable regions (such as stream.async.execute
) !stream.timepoint
values are used to sequence the execution. These timepoints represent some point in time where all execution up to that timepoint has completed and any results that were produced by the execution are available for use. Attempting to use the resources before their corresponding timepoint has been reached will lead to undefined behavior. The benefit of this is that after timepoints are established in the IR it's possible to induce aliasing of resources without breaking execution correctness.
- 'stream' Dialect
- Operation definition
- Async control flow ops
- stream.async.call (Stream::AsyncCallOp)
- stream.async.concurrent (Stream::AsyncConcurrentOp)
- stream.async.execute (Stream::AsyncExecuteOp)
- stream.async.func (Stream::AsyncFuncOp)
- Channel ops
- stream.channel.count (Stream::ChannelCountOp)
- stream.channel.create (Stream::ChannelCreateOp)
- stream.channel.rank (Stream::ChannelRankOp)
- stream.channel.split (Stream::ChannelSplitOp)
- Executable ops
- stream.binding.subspan (Stream::BindingSubspanOp)
- stream.executable.end (Stream::ExecutableEndOp)
- stream.executable.export (Stream::ExecutableExportOp)
- stream.executable (Stream::ExecutableOp)
- Execution context ops
- stream.context.resolve (Stream::ContextResolveOp)
- Explicit command ops
- stream.cmd.call (Stream::CmdCallOp)
- stream.cmd.collective (Stream::CmdCollectiveOp)
- stream.cmd.concurrent (Stream::CmdConcurrentOp)
- stream.cmd.copy (Stream::CmdCopyOp)
- stream.cmd.discard (Stream::CmdDiscardOp)
- stream.cmd.dispatch (Stream::CmdDispatchOp)
- stream.cmd.execute (Stream::CmdExecuteOp)
- stream.cmd.fill (Stream::CmdFillOp)
- stream.cmd.flush (Stream::CmdFlushOp)
- stream.cmd.func (Stream::CmdFuncOp)
- stream.cmd.invalidate (Stream::CmdInvalidateOp)
- stream.cmd.serial (Stream::CmdSerialOp)
- File ops
- stream.file.constant (Stream::FileConstantOp)
- stream.file.read (Stream::FileReadOp)
- stream.file.write (Stream::FileWriteOp)
- Miscellaneous ops
- stream.return (Stream::ReturnOp)
- stream.yield (Stream::YieldOp)
- Pseudo Ops
- stream.tensor.export (Stream::TensorExportOp)
- stream.tensor.import (Stream::TensorImportOp)
- Resource ops
- stream.resource.alloc (Stream::ResourceAllocOp)
- stream.resource.alloca (Stream::ResourceAllocaOp)
- stream.resource.constants (Stream::ResourceConstantsOp)
- stream.resource.dealloca (Stream::ResourceDeallocaOp)
- stream.resource.load (Stream::ResourceLoadOp)
- stream.resource.pack (Stream::ResourcePackOp)
- stream.resource.size (Stream::ResourceSizeOp)
- stream.resource.store (Stream::ResourceStoreOp)
- stream.resource.subview (Stream::ResourceSubviewOp)
- stream.resource.try_map (Stream::ResourceTryMapOp)
- Resource parameter I/O ops
- stream.parameter.gather (Stream::ParameterGatherOp)
- stream.parameter.load (Stream::ParameterLoadOp)
- stream.parameter.read (Stream::ParameterReadOp)
- stream.parameter.scatter (Stream::ParameterScatterOp)
- stream.parameter.write (Stream::ParameterWriteOp)
- Resource transfer ops
- stream.async.alloca (Stream::AsyncAllocaOp)
- stream.async.clone (Stream::AsyncCloneOp)
- stream.async.collective (Stream::AsyncCollectiveOp)
- stream.async.constant (Stream::AsyncConstantOp)
- stream.async.copy (Stream::AsyncCopyOp)
- stream.async.dispatch (Stream::AsyncDispatchOp)
- stream.async.fill (Stream::AsyncFillOp)
- stream.async.load (Stream::AsyncLoadOp)
- stream.async.slice (Stream::AsyncSliceOp)
- stream.async.splat (Stream::AsyncSplatOp)
- stream.async.store (Stream::AsyncStoreOp)
- stream.async.transfer (Stream::AsyncTransferOp)
- stream.async.update (Stream::AsyncUpdateOp)
- Synchronization ops
- stream.timepoint.await (Stream::TimepointAwaitOp)
- stream.timepoint.barrier (Stream::TimepointBarrierOp)
- stream.timepoint.chain_external (Stream::TimepointChainExternalOp)
- stream.timepoint.export (Stream::TimepointExportOp)
- stream.timepoint.immediate (Stream::TimepointImmediateOp)
- stream.timepoint.import (Stream::TimepointImportOp)
- stream.timepoint.join (Stream::TimepointJoinOp)
- Tensor ops
- stream.tensor.clone (Stream::TensorCloneOp)
- stream.tensor.constant (Stream::TensorConstantOp)
- stream.tensor.empty (Stream::TensorEmptyOp)
- stream.tensor.fill (Stream::TensorFillOp)
- stream.tensor.load (Stream::TensorLoadOp)
- stream.tensor.sizeof (Stream::TensorSizeOfOp)
- stream.tensor.slice (Stream::TensorSliceOp)
- stream.tensor.splat (Stream::TensorSplatOp)
- stream.tensor.store (Stream::TensorStoreOp)
- stream.tensor.trace (Stream::TensorTraceOp)
- stream.tensor.update (Stream::TensorUpdateOp)
- Attribute definition
- CollectiveAttr
- NamedParameterAttr
- PartitioningConfigAttr
- ResourceConfigAttr
- TimepointAttr
- Type constraint definition
- constant resource
- external resource
- staging resource
- transient resource
- resource
- variable resource
- Type definition
- BindingType
- ChannelType
- FileType
- ResourceType
- TimepointType
"},{"location":"reference/mlir-dialects/Stream/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Stream/#async-control-flow-ops","title":"Async control flow ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasynccall-streamasynccallop","title":"stream.async.call
(Stream::AsyncCallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `stream.async.call` (`on` `(` $affinity^ `)`)?\n $callee ``\n custom<DispatchOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_ends,\n $resource_operand_lengths) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands)\n
Calls a function taking/returning resource values with stream semantics. Asynchronous calls must have no side-effects.
Note that returned resources must have their sizes declared prior to the call as this is what allows the call to be made on the stream. If external host logic is required to compute the size (avoid at all costs!) a separate func.call can be used outside of the stream to do so. If sizes are unknownable until the operation is performed it should be made as a normal asynchronous host call with 'coarse-fences' instead.
Traits: AttrSizedOperandSegments, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, CallOpInterface, Stream_AffinityOp, Stream_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type or any type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_ends
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamasyncconcurrent-streamasyncconcurrentop","title":"stream.async.concurrent
(Stream::AsyncConcurrentOp)","text":"Executes all ops concurrently
Syntax:
operation ::= `stream.async.concurrent` (`on` `(` $affinity^ `)`)?\n `with` ``\n custom<ResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands, $body)\n attr-dict-with-keyword\n
Represents a wave of work scheduled concurrently (each op executing at the same time). All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution.
Waves can be nested to create a DAG. For example, take the following graph:
|\n v---------+---------v\n+-------|-------+ +-------|-------+\n| v--+--v | | v--+--v |\n| +----+ +----+ | | +----+ +----+ |\n| | %a | | %b | | | | %c | | %d | |\n| +----+ +----+ | | +----+ +----+ |\n| +--v--+ | | +--v--+ |\n+-------|-------+ +-------|-------+\n +---------v---------+\n |\n
Represented with nested waves:
%0 = stream.async.concurrent with(%arg) -> ... {\n %1 = stream.async.concurrent with(%arg as %arg0) -> ... {\n %a = ...\n %b = ...\n stream.yield %a, %b\n }\n %2 = stream.async.concurrent with(%arg as %arg1) -> ... {\n %c = ...\n %d = ...\n stream.yield %c, %d\n }\n stream.yield %1, %2\n }\n
Traits: AttrSizedOperandSegments, HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ClosureOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_1","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_1","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncexecute-streamasyncexecuteop","title":"stream.async.execute
(Stream::AsyncExecuteOp)","text":"Executes a dependency-aware sequence of streamable ops
Syntax:
operation ::= `stream.async.execute` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `with` ``\n custom<ResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands, $body)\n `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested stream.async.concurrent
ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.
Traits: AttrSizedOperandSegments, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ClosureOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_2","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index result_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_2","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamasyncfunc-streamasyncfuncop","title":"stream.async.func
(Stream::AsyncFuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `stream.async.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n ``\n custom<ShapedFunctionSignature>($function_type,\n $tied_operands,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via stream.async.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove, Stream_AsyncPhaseOp
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type tied_operands
::mlir::ArrayAttr64-bit integer array attribute sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#channel-ops","title":"Channel ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamchannelcount-streamchannelcountop","title":"stream.channel.count
(Stream::ChannelCountOp)","text":"Returns the total number of participants in the group
Syntax:
operation ::= `stream.channel.count` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the total participant count in the collective communicator group.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_3","title":"Operands:","text":"Operand Description channel
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_3","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamchannelcreate-streamchannelcreateop","title":"stream.channel.create
(Stream::ChannelCreateOp)","text":"Creates a new channel for collective communication
Syntax:
operation ::= `stream.channel.create` (`on` `(` $affinity^ `)`)?\n (`id` `(` $id^ `)`)?\n (`group` `(` $group^ `)`)?\n (`rank` `(` $rank^ `)`)?\n (`count` `(` $count^ `)`)?\n `:` type($result)\n attr-dict-with-keyword\n
Returns a new channel with the given rank associated with the specified affinity. Collective operations using this channel must only be submitted on compatible affinities.
The group and ID are optional and may be null. The rank and count can be omitted to indicate a default inherited from the environment or device configuration at runtime.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription group
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_4","title":"Operands:","text":"Operand Description id
a reference counted byte buffer rank
index count
index"},{"location":"reference/mlir-dialects/Stream/#results_4","title":"Results:","text":"Result Description result
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#streamchannelrank-streamchannelrankop","title":"stream.channel.rank
(Stream::ChannelRankOp)","text":"Returns the rank of the local participant in the group
Syntax:
operation ::= `stream.channel.rank` $channel `:` type($result)\n attr-dict-with-keyword\n
Returns the rank the channel represents as a participant in a collective group in [0, count)
.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_5","title":"Operands:","text":"Operand Description channel
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#results_5","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamchannelsplit-streamchannelsplitop","title":"stream.channel.split
(Stream::ChannelSplitOp)","text":"Splits a collective communication channel
Syntax:
operation ::= `stream.channel.split` $channel `,` $color `,` $key\n `:` type($channel) `->` type($result)\n attr-dict-with-keyword\n
Partitions the group associated with the given channel into disjoint subgroups for each unique value of color. Each new subgroup contains all participants of the same color and within each subgroup the key argument is used to define the rank order. When multiple participants in a group use the same key the tie will be broken using their rank in the parent group. A color of -1 indicates that the rank does not participate in any subgroup and will return a null channel.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_6","title":"Operands:","text":"Operand Description channel
a collective communication channel color
index key
index"},{"location":"reference/mlir-dialects/Stream/#results_6","title":"Results:","text":"Result Description result
a collective communication channel"},{"location":"reference/mlir-dialects/Stream/#executable-ops","title":"Executable ops","text":""},{"location":"reference/mlir-dialects/Stream/#streambindingsubspan-streambindingsubspanop","title":"stream.binding.subspan
(Stream::BindingSubspanOp)","text":"Returns an alias to a subspan of interface binding data
Syntax:
operation ::= `stream.binding.subspan` $binding `` `[` $byte_offset `]`\n attr-dict `:` type($binding) `->` type($result) (`{` $dynamic_dims^ `}`)?\n
Returns a subview to a tensor or memref-like type from a binding. The same binding may have multiple subviews at different byte offsets.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_7","title":"Operands:","text":"Operand Description binding
a managed resource binding into an executable scope byte_offset
index dynamic_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_7","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Stream/#streamexecutableend-streamexecutableendop","title":"stream.executable.end
(Stream::ExecutableEndOp)","text":"Terminator pseudo-op for the executable op
Syntax:
operation ::= `stream.executable.end` attr-dict\n
Traits: HasParent, Terminator"},{"location":"reference/mlir-dialects/Stream/#streamexecutableexport-streamexecutableexportop","title":"stream.executable.export
(Stream::ExecutableExportOp)","text":"
Defines an executable entry point for dispatch operations
Syntax:
operation ::= `stream.executable.export` custom<SymbolVisibility>($sym_visibility)\n custom<SymbolAlias>($sym_name, $function_ref)\n custom<WorkgroupCountRegion>($workgroup_count)\n attr-dict-with-keyword\n
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal function.
Each entry point can have a unique workgroup count calculation region. This region takes the workload parameters passed to each flow.dispatch and produces an XYZ workgroup count for the 3D grid dispatch.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Stream/#streamexecutable-streamexecutableop","title":"stream.executable
(Stream::ExecutableOp)","text":"Generic executable module
Syntax:
operation ::= `stream.executable` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
An executable module containing one or more public functions. The contents of the functions are safe to dispatch and can be lowered further to target-specific backend IR representations.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Stream/#execution-context-ops","title":"Execution context ops","text":"Operations for interacting with the execution context that stream operations execute within.
"},{"location":"reference/mlir-dialects/Stream/#streamcontextresolve-streamcontextresolveop","title":"stream.context.resolve
(Stream::ContextResolveOp)","text":"Resolves low-level context resources based on type
Syntax:
operation ::= `stream.context.resolve` (`on` `(` $affinity^ `)`)?\n attr-dict `:` type($results)\n
WIP; allows for accessing the implementation details of lower-level dialects such as the HAL. This will likely be reworked in the future to either live inside other dialects, use some op interface instead of having a dedicated op here, or remove the op entirely and make resolution happen explicitly.
Examples:
// Returns a HAL device.\n= stream.context.resolve on(#something) : !hal.device\n// Returns a HAL device and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.device, i64\n// Returns a HAL allocator and (optional) queue affinity.\n= stream.context.resolve on(#something) : !hal.allocator, i64\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#results_8","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#explicit-command-ops","title":"Explicit command ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamcmdcall-streamcmdcallop","title":"stream.cmd.call
(Stream::CmdCallOp)","text":"Calls a streamable external host function
Syntax:
operation ::= `stream.cmd.call` $callee ``\n custom<CmdCallOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_lengths,\n $resource_operand_accesses) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands),\n $resource_operand_sizes,\n type($results),\n $result_sizes,\n $tied_operands)\n
Calls a function operating on resource values with stream semantics. Asynchronous calls must have no side-effects.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: CallOpInterface, Stream_StreamableOp, Stream_SubviewEffectOp, SymbolUserOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
::mlir::FlatSymbolRefAttrflat symbol reference attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute resource_operand_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_8","title":"Operands:","text":"Operand Description resource_operands
variadic of index or integer or floating-point or complex-type or resource or external resource or transient resource or variable resource or constant resource or any type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_9","title":"Results:","text":"Result Description results
variadic of index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#streamcmdcollective-streamcmdcollectiveop","title":"stream.cmd.collective
(Stream::CmdCollectiveOp)","text":"Dispatches a collective operation
Syntax:
operation ::= `stream.cmd.collective` `` $op `` `[` $element_count `]`\n `channel` `(` $channel `)`\n (`param` `(` $param^ `:` type($param) `)`)? `{`\n custom<DispatchResources>($resources, type($resources), $resource_sizes,\n $resource_offsets, $resource_lengths,\n $resource_accesses)\n `\\n` `}`\n attr-dict-with-keyword\n
Dispatches a collective operation specified against the device. If grouped with other collectives in a stream.cmd.concurrent
region the collective operations may fuse and execute more efficiently.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification resource_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_9","title":"Operands:","text":"Operand Description channel
a collective communication channel element_count
index param
32-bit signless integer resources
variadic of resource or external resource or transient resource or variable resource or constant resource resource_sizes
variadic of index resource_offsets
variadic of index resource_lengths
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdconcurrent-streamcmdconcurrentop","title":"stream.cmd.concurrent
(Stream::CmdConcurrentOp)","text":"Executes all ops concurrently
Syntax:
operation ::= `stream.cmd.concurrent` $body\n attr-dict-with-keyword\n
Represents a wave of work scheduled concurrently (each op executing at the same time).
Waves can be nested to create a DAG. For example, take the following graph:
|\n v---------+---------v\n+-------|-------+ +-------|-------+\n| v--+--v | | v--+--v |\n| +----+ +----+ | | +----+ +----+ |\n| | @a | | @b | | | | @c | | @d | |\n| +----+ +----+ | | +----+ +----+ |\n| +--v--+ | | +--v--+ |\n+-------|-------+ +-------|-------+\n +---------v---------+\n |\n
Represented with nested waves:
stream.cmd.concurrent {\n stream.cmd.concurrent {\n stream.cmd.dispatch @a\n stream.cmd.dispatch @b\n }\n stream.cmd.concurrent {\n stream.cmd.dispatch @c\n stream.cmd.dispatch @d\n }\n }\n
Traits: HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: RegionBranchOpInterface, Stream_StreamableOp
"},{"location":"reference/mlir-dialects/Stream/#streamcmdcopy-streamcmdcopyop","title":"stream.cmd.copy
(Stream::CmdCopyOp)","text":"Copies a subview of a stream resource to another
Syntax:
operation ::= `stream.cmd.copy` $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_10","title":"Operands:","text":"Operand Description source
any stream-compatible type source_size
index source_offset
index target
any stream-compatible type target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmddiscard-streamcmddiscardop","title":"stream.cmd.discard
(Stream::CmdDiscardOp)","text":"Discards a subview of a resource
Syntax:
operation ::= `stream.cmd.discard` $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Discards a subview of a resource, indicating that after this command the specified contents are no longer needed. This can be used to trim memory or invalidate caches.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_11","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmddispatch-streamcmddispatchop","title":"stream.cmd.dispatch
(Stream::CmdDispatchOp)","text":"Dispatches a parallelized grid of work
Syntax:
operation ::= `stream.cmd.dispatch` custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n (`(` $uniform_operands^ `:` type($uniform_operands) `)`)? `{`\n custom<DispatchResources>($resources, type($resources), $resource_sizes,\n $resource_offsets, $resource_lengths,\n $resource_accesses)\n `\\n` `}`\n attr-dict-with-keyword\n
Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, SymbolUserOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute resource_accesses
::mlir::ArrayAttraccess array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_12","title":"Operands:","text":"Operand Description workload
variadic of index uniform_operands
variadic of index or integer or floating-point or complex-type resources
variadic of resource or external resource or transient resource or variable resource or constant resource resource_sizes
variadic of index resource_offsets
variadic of index resource_lengths
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamcmdexecute-streamcmdexecuteop","title":"stream.cmd.execute
(Stream::CmdExecuteOp)","text":"Executes a dependency-aware sequence of streamable ops
Syntax:
operation ::= `stream.cmd.execute` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `with` ``\n custom<ExplicitResourceRegion>($resource_operands,\n type($resource_operands), $resource_operand_sizes,\n $body)\n `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Evaluates the operations within the region by dependency order while obeying ties when present. Nested ops execute serially in block order and nested stream.cmd.concurrent
ops can be used to run multiple ops concurrently within the stream. All resource inputs must be captured explicitly. All results are only ready once all nested ops complete execution and the returned timepoint is reached. Zero or more timepoints may be provided to block execution until they are all reached; zero timepoints indicates that execution may begin immediately.
Traits: AttrSizedOperandSegments, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: ClosureOpInterface, InferTypeOpInterface, RegionBranchOpInterface, Stream_AffinityOp, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_13","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_10","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamcmdfill-streamcmdfillop","title":"stream.cmd.fill
(Stream::CmdFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.cmd.fill` $value `,`\n $target `[` $target_offset `for` $target_length `]` `:`\n type($value) `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_14","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_length
index value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#streamcmdflush-streamcmdflushop","title":"stream.cmd.flush
(Stream::CmdFlushOp)","text":"Flushes a subview of a resource
Syntax:
operation ::= `stream.cmd.flush` (`to` `(` $source_affinity^ `)`)?\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Transfers a resource to an external target. The resource memory is made available to the target and can be made visible there using stream.cmd.invalidate
.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_15","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmdfunc-streamcmdfuncop","title":"stream.cmd.func
(Stream::CmdFuncOp)","text":"Streamable function declaration
Syntax:
operation ::= `stream.cmd.func` custom<SymbolVisibility>($sym_visibility)\n $sym_name ``\n custom<DispatchFunctionSignature>($function_type,\n $arg_attrs,\n $res_attrs)\n attr-dict-with-keyword\n ($body^)?\n
Declares a function that can be called as an asynchronous streaming operation via stream.cmd.call
. Today only external functions are allowed.
Traits: IsolatedFromAbove, Stream_CmdPhaseOp
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol
"},{"location":"reference/mlir-dialects/Stream/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type sym_visibility
::mlir::StringAttrstring attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Stream/#streamcmdinvalidate-streamcmdinvalidateop","title":"stream.cmd.invalidate
(Stream::CmdInvalidateOp)","text":"Invalidates a subview of a resource
Syntax:
operation ::= `stream.cmd.invalidate` (`from` `(` $source_affinity^ `)`)?\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Transfers a resource from an external source into the current target. The resource memory is assumed to have been made available at the source via stream.cmd.flush
.
Traits: Stream_CmdPhaseOp
Interfaces: Stream_StreamableOp, Stream_SubviewEffectOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_16","title":"Operands:","text":"Operand Description target
any stream-compatible type target_size
index target_offset
index target_length
index"},{"location":"reference/mlir-dialects/Stream/#streamcmdserial-streamcmdserialop","title":"stream.cmd.serial
(Stream::CmdSerialOp)","text":"Executes all ops serially (in-order)
Syntax:
operation ::= `stream.cmd.serial` $body\n attr-dict-with-keyword\n
Represents a sequence of work scheduled serially (each op executing one after the other).
Regions can be nested to create a DAG. For example, take the following graph:
|\n v---------+-----v\n+-------|-------+ +---|----+\n| v--+--v | | v |\n| +----+ +----+ | | +----+ |\n| | @a | | @b | | | | @c | |\n| +----+ +----+ | | +----+ |\n| | | | | | |\n| | | | | +-v--+ |\n| | | | | | @d | |\n| | | | | +----+ |\n| +--v--+ | | | |\n+-------|-------+ +---|----+\n +---------v-----+\n |\n
Represented with nested regions:
stream.cmd.concurrent {\n stream.cmd.concurrent {\n stream.cmd.dispatch @a\n stream.cmd.dispatch @b\n }\n stream.cmd.serial {\n stream.cmd.dispatch @c\n stream.cmd.dispatch @d\n }\n }\n
Traits: HasParent, RecursiveMemoryEffects, SingleBlock, SingleBlockImplicitTerminator, Stream_CmdPhaseOp
Interfaces: RegionBranchOpInterface, Stream_StreamableOp
"},{"location":"reference/mlir-dialects/Stream/#file-ops","title":"File ops","text":"File ops.
"},{"location":"reference/mlir-dialects/Stream/#streamfileconstant-streamfileconstantop","title":"stream.file.constant
(Stream::FileConstantOp)","text":"Creates a file backed by the provided constant host memory
Syntax:
operation ::= `stream.file.constant` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `for` $source_length `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Synchronously wraps a host heap buffer into a stream-accessible file handle. Changing the source buffer after definition has undefined behavior.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_17","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index source_length
index"},{"location":"reference/mlir-dialects/Stream/#results_11","title":"Results:","text":"Result Description result
a file handle used for I/O operations"},{"location":"reference/mlir-dialects/Stream/#streamfileread-streamfilereadop","title":"stream.file.read
(Stream::FileReadOp)","text":"Reads a segment of a file into a resource
Syntax:
operation ::= `stream.file.read` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `->`\n type($target) `` `{` $target_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a segment of a file into a resource.
Some implementations can stream directly from the source file into device-local memory and file ops should be preferred to manually staging memory through host buffers.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_18","title":"Operands:","text":"Operand Description source
a file handle used for I/O operations source_offset
64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_12","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamfilewrite-streamfilewriteop","title":"stream.file.write
(Stream::FileWriteOp)","text":"Writes a segment of a file from a resource
Syntax:
operation ::= `stream.file.write` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target)\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously writes a segment of a resource into a file. The file range must be valid within the file as this operation cannot grow the underlying file storage.
Some implementations can stream directly from device-local memory into the target file and file ops should be preferred to manually staging memory through host buffers.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_19","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index target
a file handle used for I/O operations target_offset
64-bit signless integer length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_13","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#miscellaneous-ops","title":"Miscellaneous ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamreturn-streamreturnop","title":"stream.return
(Stream::ReturnOp)","text":"Returns results from a region
Syntax:
operation ::= `stream.return` attr-dict\n $operands `:` type($operands)\n
The values returned are copied by-value.
Traits: AlwaysSpeculatableImplTrait, HasParent, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_20","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamyield-streamyieldop","title":"stream.yield
(Stream::YieldOp)","text":"Yields stream values from an execution region
Syntax:
operation ::= `stream.yield` attr-dict\n ($resource_operands^ `:`\n custom<SizeAwareTypeList>(type($resource_operands),\n $resource_operand_sizes))?\n
The values returned represent the asynchronous value at the point in time the SSA value is defined (or tied).
Traits: AlwaysSpeculatableImplTrait, HasParent, SameVariadicOperandSize, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_21","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#pseudo-ops","title":"Pseudo Ops","text":"Pseudo ops for conversion support.
"},{"location":"reference/mlir-dialects/Stream/#streamtensorexport-streamtensorexportop","title":"stream.tensor.export
(Stream::TensorExportOp)","text":"Conversion placeholder for stream->other type conversion
Syntax:
operation ::= `stream.tensor.export` (`on` `(` $affinity^ `)`)?\n $source `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Defines a conversion to a higher-level dialect type such as tensor
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_22","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource or staging resource source_encoding_dims
variadic of index source_size
index"},{"location":"reference/mlir-dialects/Stream/#results_14","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Stream/#streamtensorimport-streamtensorimportop","title":"stream.tensor.import
(Stream::TensorImportOp)","text":"Conversion placeholder for other->stream type conversion
Syntax:
operation ::= `stream.tensor.import` (`on` `(` $affinity^ `)`)?\n $source `:`\n type($source)\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `{` $result_size `}`\n attr-dict-with-keyword\n
Defines a conversion from a higher-level dialect type such as tensor
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_23","title":"Operands:","text":"Operand Description source
any type result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_15","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#resource-ops","title":"Resource ops","text":"Generic resource ops.
"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloc-streamresourceallocop","title":"stream.resource.alloc
(Stream::ResourceAllocOp)","text":"Allocates a persistent resource
Syntax:
operation ::= `stream.resource.alloc` (`on` `(` $affinity^ `)`)?\n (`uninitialized` $uninitialized^)?\n attr-dict `:`\n type($result) `{` $storage_size `}`\n
Allocates a persistent value (one that is long-lived and possibly external to the program) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
Uninitialized allocations will have undefined contents and must only be used when all bytes are discarded prior to any reads. Runtimes decide what \"undefined contents\" means and here it only indicates that execution will be correct even if the memory starts with non-zero values.
If multiple values are allocated from the same operation it implies that they have matching lifetimes. When lowering to execution environments the separate allocations may be fused into one or more slab allocations in order to reduce overheads. How many allocations can be fused is based on the size of the individual resources and the target constraints (how large any single buffer may be, etc).
Traits: AlwaysSpeculatableImplTrait
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription uninitialized
::mlir::UnitAttrunit attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_24","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#results_16","title":"Results:","text":"Result Description result
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcealloca-streamresourceallocaop","title":"stream.resource.alloca
(Stream::ResourceAllocaOp)","text":"Allocates a transient value with undefined contents
Syntax:
operation ::= `stream.resource.alloca` `uninitialized`\n (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`):(`:`)?\n attr-dict\n type($result) `{` $storage_size `}`\n `=` `` `>`\n type($result_timepoint)\n
Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
The resource returned is not valid for use until the timepoint is reached; execution using this resource must await on the timepoint.
Traits: AlwaysSpeculatableImplTrait
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_25","title":"Operands:","text":"Operand Description storage_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_17","title":"Results:","text":"Result Description result
any stream-compatible type result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceconstants-streamresourceconstantsop","title":"stream.resource.constants
(Stream::ResourceConstantsOp)","text":"Asynchronously uploads or maps constant values
Syntax:
operation ::= `stream.resource.constants` (`on` `(` $affinity^ `)`)?\n attr-dict `:`\n custom<ConstantValueList>(type($results),\n $result_sizes,\n $values)\n `\\n` ` ` ` ` `=` `` `>` type($result_timepoint)\n
Represents an upload of constant resources that may be packed, suballocated, and mapped depending on the final lowering target.
In runtime environments where memory is shared between host and device this turns into a mapping operation that avoids additional memory allocation and copies. When memory cannot be shared an asynchronous stream will be created to allocate and copy all of the constant values.
Though this op returns a unique resource for each constant value it's expected that almost all end up aliasing into the same storage. The exact packing and number of storage resources that are needed are not known until lowering to a particular backend, though, so they are separate here for proper usage tracking.
Both constant and variable resources can be produced; a constant is immutable while a variable will be treated as a constant-value initializer for a mutable resource. By modeling these together it's not required that variable initializers first be allocated, copied to the target, and then copied into the variable storage if the target is capable of doing a direct upload or mapping.
Traits: AlwaysSpeculatableImplTrait, SameVariadicResultSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription values
::mlir::ArrayAttrconstant value array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_26","title":"Operands:","text":"Operand Description result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_18","title":"Results:","text":"Result Description results
variadic of constant resource or variable resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourcedealloca-streamresourcedeallocaop","title":"stream.resource.dealloca
(Stream::ResourceDeallocaOp)","text":"Frees a transient value when available
Syntax:
operation ::= `stream.resource.dealloca` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n $operand `:` type($operand) `{` $operand_size `}`\n `=` `` `>` type($result_timepoint)\n attr-dict\n
Deallocates a transient value (one that is short-lived and local to the current computation) previously allocated using stream.resource.alloca
.
The resource is considered live and valid until the provided timepoint is reached and the memory is only made available for future requests after the result timepoint is reached.
Interfaces: AffinityOpInterface, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_27","title":"Operands:","text":"Operand Description operand
any stream-compatible type operand_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_19","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamresourceload-streamresourceloadop","title":"stream.resource.load
(Stream::ResourceLoadOp)","text":"Loads a value from a staging resource
Syntax:
operation ::= `stream.resource.load` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element(s) at the given offset in the staging resource. The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be transferred.
Interfaces: Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#operands_28","title":"Operands:","text":"Operand Description source
staging resource source_size
index source_offset
index"},{"location":"reference/mlir-dialects/Stream/#results_20","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcepack-streamresourcepackop","title":"stream.resource.pack
(Stream::ResourcePackOp)","text":"Packs variable-sized slices into a single slab
Syntax:
operation ::= `stream.resource.pack` (`on` `(` $affinity^ `)`)?\n (`offset` `(` $offset^ `)`)?\n `slices` `(` `{`\n custom<PackSliceRanges>($lifetime_intervals,\n $dynamic_slice_sizes,\n type($packed_offsets))\n `}` `)`\n `:` type($total_length)\n attr-dict-with-keyword\n
Performs a greedy packing of one or more sized slices with specified lifetimes and returns their relative offsets in an aliased linear space.
Slices are [start, end] = %slice_byte_size
, where the start and end values define an inclusive lifetime range and the size is the total number of bytes required to be live for that range.
// Computes the total length required for the packed values and the offsets\n// of the 3 slices requested relative to the base of the packed memory:\n%total_length, %offset_0, %offset_1, %offset_2 =\n stream.resource.pack\n // Each slice gets one result offset:\n slices({\n // 3 slices where A and B overlap and will get unique offsets\n // while B and C do not overlap and are allowed to alias.\n [0, 10] = %size_0, // A => %offset_0\n [3, 8] = %size_1, // B => %offset_1\n [9, 10] = %size_2, // C => %offset_2\n ...\n }) : index\n
The lifetime start and end points (inclusive) are only used for relative comparisons and may originate with any meaning (op order in block, epoch, phase of the moon, etc). The packing algorithm uses the intervals to determine slice liveness and when aliasing is safe.
The size of each slice may either be a constant or runtime-computed dynamic value. Constant slices can achieve more dense packing than the dynamic values and CSE/canonicalization should be applied to ensure that as many of the dynamic values are equivalent if possible.
The total length required to pack all slices is returned and can be used to acquire storage. The individual slice offsets are 0-based and as such if are directly used as buffer offsets may need additional offsetting. This can either be applied via the optional offset
operand or slicing of the underlying allocation buffer.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription lifetime_intervals
::mlir::ArrayAttrindex array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_29","title":"Operands:","text":"Operand Description offset
index dynamic_slice_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_21","title":"Results:","text":"Result Description total_length
index packed_offsets
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamresourcesize-streamresourcesizeop","title":"stream.resource.size
(Stream::ResourceSizeOp)","text":"Returns the size of the resource storage in bytes
Syntax:
operation ::= `stream.resource.size` (`on` `(` $affinity^ `)`)?\n $operand\n attr-dict `:` type($operand)\n
Returns a possibly runtime-dynamic byte size of the resource backing storage. This may differ from the logical storage size of a value based on the alignment requirements of the target as well as encoding of higher level values such as sparse tensor formats.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_30","title":"Operands:","text":"Operand Description operand
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#results_22","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Stream/#streamresourcestore-streamresourcestoreop","title":"stream.resource.store
(Stream::ResourceStoreOp)","text":"Stores a value into a staging resource
Syntax:
operation ::= `stream.resource.store` $value `,`\n $target `[` $target_offset `]` `:`\n type($value)\n `->`\n type($target) `{` $target_size `}`\n attr-dict-with-keyword\n
The operation will complete synchronously against the resource though it may introduce a yield point if the staging resource needs to be acquired.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#operands_31","title":"Operands:","text":"Operand Description target
staging resource target_size
index target_offset
index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamresourcesubview-streamresourcesubviewop","title":"stream.resource.subview
(Stream::ResourceSubviewOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.resource.subview` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Aliases a byte subrange of a resource.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), StreamableOpInterface, TiedOpInterface, Util_SizeAwareOp, Util_SubrangeOp, ViewLikeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_32","title":"Operands:","text":"Operand Description source
any stream-compatible type source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_23","title":"Results:","text":"Result Description result
any stream-compatible type"},{"location":"reference/mlir-dialects/Stream/#streamresourcetry_map-streamresourcetrymapop","title":"stream.resource.try_map
(Stream::ResourceTryMapOp)","text":"Maps read-only memory into a resource
Syntax:
operation ::= `stream.resource.try_map` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `]` `:`\n type($source)\n `->`\n type($did_map) `,` type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Synchronously maps a host heap buffer into a stream-accessible resource with the requested lifetime. If the given source cannot be mapped the did_map
result will be 0 and users must find another route into memory (such as file I/O). The resulting resource is not coherent with the source and behavior is undefined if the underlying contents change.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_33","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_24","title":"Results:","text":"Result Description did_map
1-bit signless integer result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#resource-parameter-io-ops","title":"Resource parameter I/O ops","text":"Resource parameter I/O ops.
"},{"location":"reference/mlir-dialects/Stream/#streamparametergather-streamparametergatherop","title":"stream.parameter.gather
(Stream::ParameterGatherOp)","text":"Gathers multiple resources from a parameter scope
Syntax:
operation ::= `stream.parameter.gather` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `{`\n custom<ParameterGatherOperations>(\n $source_scope, $source_keys, $source_offsets,\n $target, type($target), $target_size, $target_offsets, $target_lengths)\n `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously gathers one or more resources into a single target stream resource. This is equivalent to one stream.parameter.read
per parameter but allows implementations that can batch operations to do so without additional timeline overhead.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_keys
::mlir::ArrayAttrstring array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_34","title":"Operands:","text":"Operand Description source_offsets
variadic of 64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offsets
variadic of index target_lengths
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_25","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterload-streamparameterloadop","title":"stream.parameter.load
(Stream::ParameterLoadOp)","text":"Reads a resource from a parameter scope
Syntax:
operation ::= `stream.parameter.load` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n custom<ParameterReference>($source_scope, $source_key)\n `` `[` $source_offset `]` `:`\n type($result) `` `{` $result_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a resource from an external parameter provider and returns the resulting stream resource. Depending on the resource type this may alias existing cached storage or be directly mapped to the parameter origin or result in a copy as if stream.resource.alloca
and stream.parameter.read
had been used.
Traits: AlwaysSpeculatableImplTrait, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_35","title":"Operands:","text":"Operand Description source_offset
64-bit signless integer result_size
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_26","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterread-streamparameterreadop","title":"stream.parameter.read
(Stream::ParameterReadOp)","text":"Reads a resource from a parameter scope
Syntax:
operation ::= `stream.parameter.read` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n custom<ParameterReference>($source_scope, $source_key)\n `` `[` $source_offset `]` `->`\n $target `[` $target_offset `for` $target_length `]` `:`\n type($target) `` `{` $target_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously reads a resource from an external parameter provider into the provided target resource range.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription source_scope
::mlir::StringAttrstring attribute source_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_36","title":"Operands:","text":"Operand Description source_offset
64-bit signless integer target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_length
index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_27","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterscatter-streamparameterscatterop","title":"stream.parameter.scatter
(Stream::ParameterScatterOp)","text":"Scatters multiple resources to a parameter scope
Syntax:
operation ::= `stream.parameter.scatter` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n `{`\n custom<ParameterScatterOperations>(\n $source, type($source), $source_size, $source_offsets, $source_lengths,\n $target_scope, $target_keys, $target_offsets)\n `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously scatters one or more resources from a single source resource into one or more parameters. This is equivalent to one stream.parameter.write
per parameter but allows implementations that can batch operations to do so without additional overhead.
Traits: AttrSizedOperandSegments, Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_keys
::mlir::ArrayAttrstring array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_37","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offsets
variadic of index source_lengths
variadic of index target_offsets
variadic of 64-bit signless integer await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_28","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamparameterwrite-streamparameterwriteop","title":"stream.parameter.write
(Stream::ParameterWriteOp)","text":"Writes a resource to a parameter scope
Syntax:
operation ::= `stream.parameter.write` (`on` `(` $affinity^ `)`)?\n (`await` `(` $await_timepoint^ `)` `=` `` `>`)?\n $source `[` $source_offset `for` $source_length `]` `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ParameterReference>($target_scope, $target_key)\n `` `[` $target_offset `]`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Asynchronously writes a resource to an external parameter provider from the provided source resource range.
Traits: Stream_CmdPhaseOp
Interfaces: AffinityOpInterface, InferTypeOpInterface, Stream_TimelineOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription target_scope
::mlir::StringAttrstring attribute target_key
::mlir::StringAttrstring attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_38","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_length
index target_offset
64-bit signless integer await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_29","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#resource-transfer-ops","title":"Resource transfer ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamasyncalloca-streamasyncallocaop","title":"stream.async.alloca
(Stream::AsyncAllocaOp)","text":"Allocates a transient value with undefined contents
Syntax:
operation ::= `stream.async.alloca` (`on` `(` $affinity^ `)`)?\n attr-dict `:` type($result) `{` $storage_size `}`\n
Allocates a transient value (one that is short-lived and local to the current computation) with undefined contents. Consumers of the allocated result must assume nothing of the contents and use discard
access.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AffinityOpInterface, ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), StreamableOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Stream/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_39","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#results_30","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncclone-streamasynccloneop","title":"stream.async.clone
(Stream::AsyncCloneOp)","text":"Clones the contents of a value
Syntax:
operation ::= `stream.async.clone` (`on` `(` $affinity^ `)`)?\n $source `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_40","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_31","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccollective-streamasynccollectiveop","title":"stream.async.collective
(Stream::AsyncCollectiveOp)","text":"Performs a collective operation
Syntax:
operation ::= `stream.async.collective` `` $op `` `[` $element_count `]`\n (`on` `(` $affinity^ `)`)?\n `channel` `(` $channel `)`\n custom<CollectiveParam>(ref($op), $param) ``\n $source `[` $source_offset `to` $source_end `for` $source_length `]` `,`\n $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
TODO: document different usage. For now this should be considered a prototype and that modeling of collective operations may change in the future to better ensure in-place operations (where send/recv is a subset of recv/send). We may have dedicated operations for the send and recv verbs as they have sequencing implications - or we could add optional sequencing to this base op.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription op
::mlir::iree_compiler::IREE::Stream::CollectiveAttrcollective operation and specification affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_41","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index target_length
index source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index source_length
index element_count
index channel
a collective communication channel param
32-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_32","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncconstant-streamasyncconstantop","title":"stream.async.constant
(Stream::AsyncConstantOp)","text":"Defines a constant resource
Syntax:
operation ::= `stream.async.constant` (`on` `(` $affinity^ `)`)?\n `:`\n type($result) `` `{` $result_size `}`\n `=`\n $value\n attr-dict-with-keyword\n
Returns a new resource with the given constant value.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_42","title":"Operands:","text":"Operand Description result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_33","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasynccopy-streamasynccopyop","title":"stream.async.copy
(Stream::AsyncCopyOp)","text":"Copies a subview of a stream resource to another
Syntax:
operation ::= `stream.async.copy` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `to` $source_end `]` `,`\n $target `[` $target_offset `to` $target_end `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a subview of a resource into a subview of another. As with memcpy this does not support overlapping updates into the same resource. Unlike stream.async.update
copy sources cannot be allocated in-place.
Equivalent to a stream.async.slice + stream.async.update.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_43","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index length
index"},{"location":"reference/mlir-dialects/Stream/#results_34","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncdispatch-streamasyncdispatchop","title":"stream.async.dispatch
(Stream::AsyncDispatchOp)","text":"Dispatches a parallelized grid of work
Syntax:
operation ::= `stream.async.dispatch` (`on` `(` $affinity^ `)`)?\n custom<DispatchEntryPoints>($entry_points)\n (`[` $workload^ `]`)? ``\n custom<DispatchOperands>($resource_operands,\n $resource_operand_offsets,\n $resource_operand_ends,\n $resource_operand_lengths) attr-dict `:`\n custom<ShapedFunctionType>(ref($resource_operands),\n type($resource_operands), $resource_operand_sizes,\n type($results), $result_sizes,\n $tied_operands)\n
Calls the specified entry point function once for each element in the specified workgroup count. Each workgroup has access to the same operands and results and is able to load/store at will.
Traits: AttrSizedOperandSegments, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, Stream_StreamableOp, SymbolUserOpInterface, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription entry_points
::mlir::ArrayAttrsymbol ref array attribute tied_operands
::mlir::ArrayAttr64-bit integer array attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_44","title":"Operands:","text":"Operand Description workload
variadic of index resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or index or integer or floating-point or complex-type resource_operand_sizes
variadic of index resource_operand_offsets
variadic of index resource_operand_ends
variadic of index resource_operand_lengths
variadic of index result_sizes
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_35","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncfill-streamasyncfillop","title":"stream.async.fill
(Stream::AsyncFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.async.fill` (`on` `(` $affinity^ `)`)?\n $value `,`\n $target `[` $target_offset `to` $target_end `for` $target_length `]` `:`\n type($value) `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Equivalent to a stream.async.splat + stream.async.update.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_45","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index target_length
index value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer"},{"location":"reference/mlir-dialects/Stream/#results_36","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncload-streamasyncloadop","title":"stream.async.load
(Stream::AsyncLoadOp)","text":"Loads a value from a resource
Syntax:
operation ::= `stream.async.load` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element at the given location from within the resource.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_46","title":"Operands:","text":"Operand Description source
staging resource source_size
index source_offset
index"},{"location":"reference/mlir-dialects/Stream/#results_37","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamasyncslice-streamasyncsliceop","title":"stream.async.slice
(Stream::AsyncSliceOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.async.slice` (`on` `(` $affinity^ `)`)?\n $source `[` $source_offset `to` $source_end `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Slices a subrange of a stream resource based on a byte range. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_47","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_size
index source_offset
index source_end
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_38","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncsplat-streamasyncsplatop","title":"stream.async.splat
(Stream::AsyncSplatOp)","text":"Splats a value into a resource
Syntax:
operation ::= `stream.async.splat` (`on` `(` $affinity^ `)`)?\n $value `:` type($value) `->` type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a new resource with the given primitive value splatted out to fill the entire contents.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_40","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_48","title":"Operands:","text":"Operand Description value
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_39","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncstore-streamasyncstoreop","title":"stream.async.store
(Stream::AsyncStoreOp)","text":"Stores a value into a resource
Syntax:
operation ::= `stream.async.store` $value `,`\n $target `[` $target_offset `]` `:`\n type($value)\n `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Returns a resource with the element at the given offset set to the given value.
Traits: AlwaysSpeculatableImplTrait, Stream_AsyncPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_49","title":"Operands:","text":"Operand Description target
staging resource target_size
index target_offset
index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_40","title":"Results:","text":"Result Description result
staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasynctransfer-streamasynctransferop","title":"stream.async.transfer
(Stream::AsyncTransferOp)","text":"Transfers a resource from one location/state to another
Syntax:
operation ::= `stream.async.transfer` (`from` `(` $source_affinity^ `)`)?\n $source `:`\n type($source) `` `{` $source_size `}` `->`\n (`to` `(` $result_affinity^ `)`)?\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Transfers a resource between different states (such as a staging
lifetime to a local
lifetime) or different affinities. This is roughly equivalent to a cast but may have special semantics when later lowered to one or more devices with discrete memory spaces or pools.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, Stream_AffinityOp, Stream_StreamableOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_41","title":"Attributes:","text":"AttributeMLIR TypeDescription source_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity result_affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_50","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource or staging resource source_size
index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_41","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamasyncupdate-streamasyncupdateop","title":"stream.async.update
(Stream::AsyncUpdateOp)","text":"Updates a slice of a subview of a resource in-place
Syntax:
operation ::= `stream.async.update` (`on` `(` $affinity^ `)`)?\n $update `,`\n $target `[` $target_offset `to` $target_end `]` `:`\n type($update) `` `{` $update_size `}` `->`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a value into a resource based on a byte range. The returned value is the entire updated target value. Updates can be turned into placement allocations and avoid copies.
Traits: Stream_AsyncPhaseOp
Interfaces: AsyncAccessOpInterface, InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_42","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_51","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_size
index target_offset
index target_end
index update
resource or external resource or transient resource or variable resource or constant resource update_size
index"},{"location":"reference/mlir-dialects/Stream/#results_42","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#synchronization-ops","title":"Synchronization ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtimepointawait-streamtimepointawaitop","title":"stream.timepoint.await
(Stream::TimepointAwaitOp)","text":"Awaits a timepoint before returning a set of resources
Syntax:
operation ::= `stream.timepoint.await` (`on` `(` $affinity^ `)`)?\n $await_timepoint `=` `` `>`\n $resource_operands `:`\n custom<SizeAwareTypeList>(type($resource_operands),\n type($results), $resource_operand_sizes)\n attr-dict-with-keyword\n
After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables resolving the version of a resource after a particular point in the timeline. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_43","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_52","title":"Operands:","text":"Operand Description resource_operands
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource resource_operand_sizes
variadic of index await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_43","title":"Results:","text":"Result Description results
variadic of resource or external resource or transient resource or variable resource or constant resource or staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtimepointbarrier-streamtimepointbarrierop","title":"stream.timepoint.barrier
(Stream::TimepointBarrierOp)","text":"Returns a timepoint indicating when a resource is available
Syntax:
operation ::= `stream.timepoint.barrier` (`on` `(` $affinity^ `)`)?\n $resource `:` type($resource) `` `{` $resource_size `}`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
After asynchronous execution scheduling resources may exist in different states at different points in the execution timeline. This op enables identifying when the version of a resource after a particular point in the timeline is available. As timepoints transitively chain the timepoint must only cover the resource availability but not be limited to its original production timepoint.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_TimelineOp, TiedOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_44","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_53","title":"Operands:","text":"Operand Description resource
resource or external resource or transient resource or variable resource or constant resource or staging resource resource_size
index"},{"location":"reference/mlir-dialects/Stream/#results_44","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource or staging resource result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointchain_external-streamtimepointchainexternalop","title":"stream.timepoint.chain_external
(Stream::TimepointChainExternalOp)","text":"Exports a timepoint to an external dialect type
Syntax:
operation ::= `stream.timepoint.chain_external` (`on` `(` $affinity^ `)`)?\n $await_timepoint\n `=` `` `>`\n `(` $external_values `:` type($external_values) `)`\n attr-dict-with-keyword\n
Defines a conversion to an external dialect type such as hal.fence
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Interfaces: Stream_AffinityOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_45","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_54","title":"Operands:","text":"Operand Description await_timepoint
a timepoint indicating execution availability external_values
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointexport-streamtimepointexportop","title":"stream.timepoint.export
(Stream::TimepointExportOp)","text":"Exports a timepoint to an external dialect type
Syntax:
operation ::= `stream.timepoint.export` (`on` `(` $affinity^ `)`)?\n $await_timepoint\n `=` `` `>`\n `(` type($results) `)`\n attr-dict-with-keyword\n
Defines a conversion to an external dialect type such as hal.fence
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_46","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_55","title":"Operands:","text":"Operand Description await_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_45","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimmediate-streamtimepointimmediateop","title":"stream.timepoint.immediate
(Stream::TimepointImmediateOp)","text":"Results an immediately-available timepoint
Syntax:
operation ::= `stream.timepoint.immediate` attr-dict\n `=` `` `>` type($result_timepoint)\n
Timepoints indicate a point in the execution timeline and this op can be used to get a placeholder representing the start of the timeline. Any waits on the returned timepoint will resolve immediately. This generally folds away but can be useful if needing to initialize globals or branch args.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#results_46","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointimport-streamtimepointimportop","title":"stream.timepoint.import
(Stream::TimepointImportOp)","text":"Imports a timepoint from an external dialect type
Syntax:
operation ::= `stream.timepoint.import` (`on` `(` $affinity^ `)`)?\n $operands `:` `(` type($operands) `)`\n `=` `` `>`\n type($result_timepoint)\n attr-dict-with-keyword\n
Defines a conversion from an external dialect type such as hal.semaphore
that is resolved during lowering into the stream dialect. This can be used to interoperate between levels of the stack that require specifying stream types and those that prior to lowering do not handle them.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_47","title":"Attributes:","text":"AttributeMLIR TypeDescription affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_56","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Stream/#results_47","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#streamtimepointjoin-streamtimepointjoinop","title":"stream.timepoint.join
(Stream::TimepointJoinOp)","text":"Joins one or more timepoints into the max of all of them
Syntax:
operation ::= `stream.timepoint.join` `max` `(` $await_timepoints `)` `=` `` `>` type($result_timepoint)\n attr-dict-with-keyword\n
Returns a timepoint that indicates that all of the input timepoints have been reached.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_TimelineOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#operands_57","title":"Operands:","text":"Operand Description await_timepoints
variadic of a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#results_48","title":"Results:","text":"Result Description result_timepoint
a timepoint indicating execution availability"},{"location":"reference/mlir-dialects/Stream/#tensor-ops","title":"Tensor ops","text":""},{"location":"reference/mlir-dialects/Stream/#streamtensorclone-streamtensorcloneop","title":"stream.tensor.clone
(Stream::TensorCloneOp)","text":"Clones the contents of a value
Syntax:
operation ::= `stream.tensor.clone` (`on` `(` $affinity^ `)`)?\n $source `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Clones the contents of a value at a snapshot in time. Future changes to the cloned value will not effect the result. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_48","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_58","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_encoding_dims
variadic of index source_size
index result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_49","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorconstant-streamtensorconstantop","title":"stream.tensor.constant
(Stream::TensorConstantOp)","text":"Defines a constant tensor value
Syntax:
operation ::= `stream.tensor.constant` (`on` `(` $affinity^ `)`)?\n `:`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result)\n `=`\n $value\n attr-dict-with-keyword\n
Returns a typed resource initialized to the given constant value.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_49","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_59","title":"Operands:","text":"Operand Description result_encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_50","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorempty-streamtensoremptyop","title":"stream.tensor.empty
(Stream::TensorEmptyOp)","text":"Defines an empty tensor value
Syntax:
operation ::= `stream.tensor.empty` (`on` `(` $affinity^ `)`)?\n `:`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a typed resource initialized with no contents. This still carries shape metadata and may encode to a non-empty resource such as in cases where the empty representation still has data (e.g. sparse tensors). Subsequent writes must populate any ranges of the tensor that are later read.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Stream_AffinityOp, StreamableOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_50","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_60","title":"Operands:","text":"Operand Description result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_51","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorfill-streamtensorfillop","title":"stream.tensor.fill
(Stream::TensorFillOp)","text":"Fills a subview of a stream resource with a value
Syntax:
operation ::= `stream.tensor.fill` (`on` `(` $affinity^ `)`)?\n $value `,` $target `[` $start_indices `for` $lengths `]` `:`\n type($value)\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Splats a value into a subview of the given stream resource and returns the resource with the update applied.
Equivalent to a stream.tensor.splat + stream.tensor.update.
Traits: AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_51","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_61","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_encoding_dims
variadic of index target_size
index start_indices
variadic of index lengths
variadic of index value
index or integer or floating-point or complex-type"},{"location":"reference/mlir-dialects/Stream/#results_52","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorload-streamtensorloadop","title":"stream.tensor.load
(Stream::TensorLoadOp)","text":"Loads a value from a tensor element
Syntax:
operation ::= `stream.tensor.load` $source (`[` $indices^ `]`)? `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n type($result)\n attr-dict-with-keyword\n
Returns the element at the given location from within the tensor.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_52","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_62","title":"Operands:","text":"Operand Description source
staging resource source_encoding_dims
variadic of index source_size
index indices
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_53","title":"Results:","text":"Result Description result
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#streamtensorsizeof-streamtensorsizeofop","title":"stream.tensor.sizeof
(Stream::TensorSizeOfOp)","text":"Calculates the storage size of a given high-level type
Syntax:
operation ::= `stream.tensor.sizeof` (`on` `(` $affinity^ `)`)?\n $encoding (`{` $encoding_dims^ `}`)?\n attr-dict `:` type($storage_size)\n
Target-dependent storage size calculation using a high-level annotated type. While within the stream dialect the storage size of a value is left as a placeholder using this op. The requisite target-specific parameters for expanding the size calculation are only available after affinities have been assigned.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_53","title":"Attributes:","text":"AttributeMLIR TypeDescription encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_63","title":"Operands:","text":"Operand Description encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#results_54","title":"Results:","text":"Result Description storage_size
index"},{"location":"reference/mlir-dialects/Stream/#streamtensorslice-streamtensorsliceop","title":"stream.tensor.slice
(Stream::TensorSliceOp)","text":"Slices out a cloned subview of a value
Syntax:
operation ::= `stream.tensor.slice` (`on` `(` $affinity^ `)`)?\n $source `[` $start_indices `for` $lengths `]` `:`\n $source_encoding (`` `{` $source_encoding_dims^ `}`)?\n `in`\n type($source) `` `{` $source_size `}`\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Slices a subrange of a stream resource based on a tensor encoding. Acts as a copy-on-write operation.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, Stream_StreamableOp, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_54","title":"Attributes:","text":"AttributeMLIR TypeDescription source_encoding
::mlir::TypeAttrany type attribute result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_64","title":"Operands:","text":"Operand Description source
resource or external resource or transient resource or variable resource or constant resource source_encoding_dims
variadic of index source_size
index start_indices
variadic of index lengths
variadic of index result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_55","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorsplat-streamtensorsplatop","title":"stream.tensor.splat
(Stream::TensorSplatOp)","text":"Splats a value into a shaped tensor
Syntax:
operation ::= `stream.tensor.splat` (`on` `(` $affinity^ `)`)?\n $value\n `:` type($value)\n `->`\n $result_encoding (`` `{` $result_encoding_dims^ `}`)?\n `in`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a typed resource initialized to the given primitive value.
Traits: AlwaysSpeculatableImplTrait, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), Stream_AffinityOp, StreamableOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_55","title":"Attributes:","text":"AttributeMLIR TypeDescription result_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_65","title":"Operands:","text":"Operand Description value
index or integer or floating-point or complex-type result_encoding_dims
variadic of index result_size
index"},{"location":"reference/mlir-dialects/Stream/#results_56","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#streamtensorstore-streamtensorstoreop","title":"stream.tensor.store
(Stream::TensorStoreOp)","text":"Stores a value into a tensor element
Syntax:
operation ::= `stream.tensor.store` $value `,`\n $target (`[` $indices^ `]`)? `:`\n type($value)\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Returns a tensor with the element at the given index set to the given value.
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Stream/#attributes_56","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Stream/#operands_66","title":"Operands:","text":"Operand Description target
staging resource target_encoding_dims
variadic of index target_size
index indices
variadic of index value
index or integer or floating-point or complex-type or vector of any type values"},{"location":"reference/mlir-dialects/Stream/#results_57","title":"Results:","text":"Result Description result
staging resource"},{"location":"reference/mlir-dialects/Stream/#streamtensortrace-streamtensortraceop","title":"stream.tensor.trace
(Stream::TensorTraceOp)","text":"Traces one or more tensor values at runtime
Syntax:
operation ::= `stream.tensor.trace` $key `=` `[`\n custom<EncodedResourceOperands>(\n $resources, type($resources), $resource_sizes,\n $resource_encodings, $resource_encoding_dims)\n `]` attr-dict-with-keyword\n
Traces out to a runtime trace sink (console, log file, etc) the given tensors. The key is arbitrary and can be used for identifying the set of values being traced.
Traits: AttrSizedOperandSegments
Interfaces: ShapeAwareOpInterface, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_57","title":"Attributes:","text":"AttributeMLIR TypeDescription key
::mlir::StringAttrstring attribute resource_encodings
::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/Stream/#operands_67","title":"Operands:","text":"Operand Description resources
variadic of staging resource resource_sizes
variadic of index resource_encoding_dims
variadic of index"},{"location":"reference/mlir-dialects/Stream/#streamtensorupdate-streamtensorupdateop","title":"stream.tensor.update
(Stream::TensorUpdateOp)","text":"Updates a slice of a subview of a resource in-place
Syntax:
operation ::= `stream.tensor.update` (`on` `(` $affinity^ `)`)?\n $update `,` $target `[` $start_indices `]` `:`\n $update_encoding (`` `{` $update_encoding_dims^ `}`)?\n `in`\n type($update) `` `{` $update_size `}`\n `->`\n $target_encoding (`` `{` $target_encoding_dims^ `}`)?\n `in`\n custom<ShapedTiedResult>(type($target), $target_size)\n attr-dict-with-keyword\n
Copies a value into a resource based on tensor encodings. The returned value is the entire updated target value.
Traits: AttrSizedOperandSegments, Stream_TensorPhaseOp
Interfaces: InferTypeOpInterface, Stream_AffinityOp, Stream_StreamableOp, TiedOpInterface, Util_ShapeAwareOp, Util_SizeAwareOp
"},{"location":"reference/mlir-dialects/Stream/#attributes_58","title":"Attributes:","text":"AttributeMLIR TypeDescription target_encoding
::mlir::TypeAttrany type attribute update_encoding
::mlir::TypeAttrany type attribute affinity
::mlir::iree_compiler::IREE::Stream::AffinityAttrdefines execution context affinity"},{"location":"reference/mlir-dialects/Stream/#operands_68","title":"Operands:","text":"Operand Description target
resource or external resource or transient resource or variable resource or constant resource target_encoding_dims
variadic of index target_size
index start_indices
variadic of index update
resource or external resource or transient resource or variable resource or constant resource update_encoding_dims
variadic of index update_size
index"},{"location":"reference/mlir-dialects/Stream/#results_58","title":"Results:","text":"Result Description result
resource or external resource or transient resource or variable resource or constant resource"},{"location":"reference/mlir-dialects/Stream/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/Stream/#collectiveattr","title":"CollectiveAttr","text":"collective operation and specification
Syntax:
#stream.collective<\n CollectiveKind, # kind\n std::optional<CollectiveReductionOp>, # reduction\n CollectiveElementType # element_type\n>\n
Specifies the collective operation to perform and any mode bits required.
"},{"location":"reference/mlir-dialects/Stream/#parameters","title":"Parameters:","text":"Parameter C++ type Description kind CollectiveKind
reduction std::optional<CollectiveReductionOp>
element_type CollectiveElementType
"},{"location":"reference/mlir-dialects/Stream/#namedparameterattr","title":"NamedParameterAttr","text":"named parameter referenced an optional scope and key
Syntax:
#stream.parameter.named<\n ::mlir::Type, # type\n StringAttr, # scope\n StringAttr, # key\n DictionaryAttr # config\n>\n
Species an externally-defined parameter that can be referenced by an optional scope defining a set of parameters and a key uniquely identifying the parameter within its scope.
"},{"location":"reference/mlir-dialects/Stream/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description type ::mlir::Type
scope StringAttr
key StringAttr
config DictionaryAttr
"},{"location":"reference/mlir-dialects/Stream/#partitioningconfigattr","title":"PartitioningConfigAttr","text":"defines partitioning configuration
Configures the partitioning algorithm to use and its configuration. Partitioning is useful to adjust when scheduling behavior of targets is radically different - such as single-threaded vs. multi-threaded CPUs or bespoke ML accelerators vs. general purpose GPUs. This mechanism controls the amount of concurrency, parallelism, memory consumption, and latency.
"},{"location":"reference/mlir-dialects/Stream/#parameters_2","title":"Parameters:","text":"Parameter C++ type Description favor IREE::Stream::FavorAttr
"},{"location":"reference/mlir-dialects/Stream/#resourceconfigattr","title":"ResourceConfigAttr","text":"defines resource constraints configuration
Defines resource storage constraints. These allow for packing and layout algorithms to ensure they are producing usable results on target devices.
"},{"location":"reference/mlir-dialects/Stream/#parameters_3","title":"Parameters:","text":"Parameter C++ type Description maxAllocationSize int64_t
minBufferOffsetAlignment int64_t
maxBufferRange int64_t
minBufferRangeAlignment int64_t
indexBits int64_t
aliasMutableBindings bool
memoryModel IREE::Stream::MemoryModel
"},{"location":"reference/mlir-dialects/Stream/#timepointattr","title":"TimepointAttr","text":"an immediately-resolved timepoint
"},{"location":"reference/mlir-dialects/Stream/#parameters_4","title":"Parameters:","text":"Parameter C++ type Description type ::mlir::Type
"},{"location":"reference/mlir-dialects/Stream/#type-constraint-definition","title":"Type constraint definition","text":""},{"location":"reference/mlir-dialects/Stream/#constant-resource","title":"constant resource","text":"Stream constants are immutable values that are available for the lifetime of the program once initialized.
"},{"location":"reference/mlir-dialects/Stream/#external-resource","title":"external resource","text":"Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#staging-resource","title":"staging resource","text":"Stream upload/download staging resource. These are used outside of streams and then transferred to other stream resources such as variables or transients for use inside of streams. Dispatches and several other operations cannot directly operate on these resources.
"},{"location":"reference/mlir-dialects/Stream/#transient-resource","title":"transient resource","text":"Stream transients represent asynchronously-available and sequenced values that have a short lifetime - often only passed between stream executions. It is expected that transient values are not stored in global state and have minimal lifetime as they may be heavily pooled or suballocated.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#resource","title":"resource","text":"A stream resource that has not yet had its lifetime calculated.
"},{"location":"reference/mlir-dialects/Stream/#variable-resource","title":"variable resource","text":"Stream variables represent asynchronously-available and sequenced values that have a long lifetime relative to the work being performed on them. These variables are often stored in global state and may live for the entire duration of the program.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Stream/#bindingtype","title":"BindingType","text":"a managed resource binding into an executable scope
Syntax: !stream.binding
A resource binding available within an executable dispatch function. The bindings map 1:1 with the resources bound during dispatch operations.
"},{"location":"reference/mlir-dialects/Stream/#channeltype","title":"ChannelType","text":"a collective communication channel
Syntax: !stream.channel
Represents a single participant in a collective clique. Multiple channels may exist within the same program to allow for partial operations or hierarchical operations.
In programs that model SPMD behavior internally channels can be created or provided by hosting applications. For example, the program could expose a @set_channels(!util.list<!stream.channel>)
method that stores the channels in globals for use throughout the program allowing for application-controlled channel configuration.
"},{"location":"reference/mlir-dialects/Stream/#filetype","title":"FileType","text":"a file handle used for I/O operations
Syntax: !stream.file
A file handle that can be asynchronously read and written into/from stream resources.
"},{"location":"reference/mlir-dialects/Stream/#resourcetype","title":"ResourceType","text":"a managed resource
Stream external values represent asynchronously-available and sequenced values that are owned and managed by external code - such as those passed in or out of the program entry points. Though external values are managed during an invocation the same as other stream values the visibility into them does not extend outside of the invocation they are provided to.
Stream values are not usable directly outside of a stream execution or transfer operation. If the contents of the value are needed they must first be transferred via stream.transfer
- which may incur a copy.
"},{"location":"reference/mlir-dialects/Stream/#parameters_5","title":"Parameters:","text":"Parameter C++ type Description lifetime IREE::Stream::Lifetime
"},{"location":"reference/mlir-dialects/Stream/#timepointtype","title":"TimepointType","text":"a timepoint indicating execution availability
Syntax: !stream.timepoint
Represents a point in the execution timeline that when resolved indicates that all of the execution prior to this timepoint has completed and the results of the execution are available for use. This includes transitive dependencies as well; if timepoint B is dependent on timepoint A then when B is available so too must be A.
"},{"location":"reference/mlir-dialects/Util/","title":"Util","text":""},{"location":"reference/mlir-dialects/Util/#util-dialect","title":"'util' Dialect","text":"A dialect used for types common across IREE subdialects.
- 'util' Dialect
- Operation definition
- Address/offset arithmetic ops
- util.align (Util::AlignOp)
- util.sizeof (Util::SizeOfOp)
- Buffer ops
- util.buffer.alloc (Util::BufferAllocOp)
- util.buffer.compare (Util::BufferCompareOp)
- util.buffer.constant (Util::BufferConstantOp)
- util.buffer.copy (Util::BufferCopyOp)
- util.buffer.dealloc (Util::BufferDeallocOp)
- util.buffer.fill (Util::BufferFillOp)
- util.buffer.load (Util::BufferLoadOp)
- util.buffer.size (Util::BufferSizeOp)
- util.buffer.slice (Util::BufferSliceOp)
- util.buffer.storage (Util::BufferStorageOp)
- util.buffer.store (Util::BufferStoreOp)
- util.buffer.subspan (Util::BufferSubspanOp)
- Compiler hint ops
- util.optimization_barrier (Util::OptimizationBarrierOp)
- util.unfoldable_constant (Util::UnfoldableConstantOp)
- util.unreachable (Util::UnreachableOp)
- Data type conversion ops
- util.numeric.optional_narrow (Util::NumericOptionalNarrowOp)
- Global ops
- util.global.address (Util::GlobalAddressOp)
- util.global.load.indirect (Util::GlobalLoadIndirectOp)
- util.global.load (Util::GlobalLoadOp)
- util.global (Util::GlobalOp)
- util.global.store.indirect (Util::GlobalStoreIndirectOp)
- util.global.store (Util::GlobalStoreOp)
- List ops
- util.list.create (Util::ListCreateOp)
- util.list.get (Util::ListGetOp)
- util.list.resize (Util::ListResizeOp)
- util.list.set (Util::ListSetOp)
- util.list.size (Util::ListSizeOp)
- Range arithmetic ops
- util.range.extents (Util::RangeExtentsOp)
- util.range.max (Util::RangeMaxOp)
- util.range.min (Util::RangeMinOp)
- Status ops
- util.status.check_ok (Util::StatusCheckOkOp)
- Structural ops
- util.initializer (Util::InitializerOp)
- util.initializer.return (Util::InitializerReturnOp)
- Type manipulation ops
- util.cast (Util::CastOp)
- util.cmp.eq (Util::CmpEQOp)
- util.null (Util::NullOp)
- Value utility ops
- util.switch (Util::SwitchOp)
- Type definition
- BufferType
- ListType
- ObjectType
- PtrType
- VariantType
"},{"location":"reference/mlir-dialects/Util/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/Util/#addressoffset-arithmetic-ops","title":"Address/offset arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilalign-utilalignop","title":"util.align
(Util::AlignOp)","text":"Aligns up to a power-of-two alignment if required
Syntax:
operation ::= `util.align` $value `,` $alignment attr-dict `:` type($result)\n
Aligns |value| up to the given power-of-two |alignment| if required.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands","title":"Operands:","text":"Operand Description value
signless-integer-like alignment
signless-integer-like"},{"location":"reference/mlir-dialects/Util/#results","title":"Results:","text":"Result Description result
signless-integer-like"},{"location":"reference/mlir-dialects/Util/#utilsizeof-utilsizeofop","title":"util.sizeof
(Util::SizeOfOp)","text":"Returns the size in bytes of a datatype
Syntax:
operation ::= `util.sizeof` $sizedType attr-dict-with-keyword\n
Most datatypes have a static size at all layers of the compilation stack. However, those that only have a size for certain lowering flows can be challenging. This op represents such sizes in a way that can be specialized later.
Returns the size in bytes, rounded up to the next whole byte of the specified type. This op will fold to a constant index value for IntegerType and FloatType. All others are not folded.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription sizedType
::mlir::TypeAttrany type attribute"},{"location":"reference/mlir-dialects/Util/#results_1","title":"Results:","text":"Result Description size
index"},{"location":"reference/mlir-dialects/Util/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/Util/#utilbufferalloc-utilbufferallocop","title":"util.buffer.alloc
(Util::BufferAllocOp)","text":"Allocates a buffer with undefined contents
Syntax:
operation ::= `util.buffer.alloc` `uninitialized`\n attr-dict\n `:`\n type($result) `` `{` $storage_size `}`\n
Allocates a buffer with undefined contents. Consumers of the allocated result must assume nothing of the contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription alignment
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_1","title":"Operands:","text":"Operand Description storage_size
index"},{"location":"reference/mlir-dialects/Util/#results_2","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercompare-utilbuffercompareop","title":"util.buffer.compare
(Util::BufferCompareOp)","text":"Compares a range of two buffers
Syntax:
operation ::= `util.buffer.compare` $lhs `[` $lhs_offset `]` `,`\n $rhs `[` $rhs_offset `]` `,`\n $length `:`\n type($lhs) `` `{` $lhs_size `}` `,`\n type($rhs) `` `{` $rhs_size `}`\n attr-dict-with-keyword\n
Returns true if the two ranges are bitwise equivalent, somewhat like memcmp.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_2","title":"Operands:","text":"Operand Description lhs
a reference counted byte buffer lhs_size
index lhs_offset
index rhs
a reference counted byte buffer rhs_size
index rhs_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#results_3","title":"Results:","text":"Result Description result
1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilbufferconstant-utilbufferconstantop","title":"util.buffer.constant
(Util::BufferConstantOp)","text":"Constant host-side byte buffer
Syntax:
operation ::= `util.buffer.constant` ($name^)? attr-dict `:` type($result) `=` $value\n
Defines a compile-time byte buffer based on the given attribute value. The attribute will be serialized into the canonical IREE format for the chosen host target.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttrindex attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#results_4","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbuffercopy-utilbuffercopyop","title":"util.buffer.copy
(Util::BufferCopyOp)","text":"Copies a range of bytes between buffers
Syntax:
operation ::= `util.buffer.copy` $source `[` $source_offset `]` `,`\n $target `[` $target_offset `]` `,`\n $length `:`\n type($source) `` `{` $source_size `}` `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Copies a range of bytes as with memcpy (no overlapping).
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_3","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbufferdealloc-utilbufferdeallocop","title":"util.buffer.dealloc
(Util::BufferDeallocOp)","text":"Deallocates a buffer
Syntax:
operation ::= `util.buffer.dealloc` $operand `:` type($operand) `{` $operand_size `}`\n attr-dict-with-keyword\n
Hints that the buffer contents can be discarded. Buffers are reference counted and other owners may keep it live beyond the dealloc.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Free on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_4","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer operand_size
index"},{"location":"reference/mlir-dialects/Util/#utilbufferfill-utilbufferfillop","title":"util.buffer.fill
(Util::BufferFillOp)","text":"Fills a range of bytes with a value
Syntax:
operation ::= `util.buffer.fill` $pattern `,`\n $target `[` $target_offset `for` $length `]` `:`\n type($pattern) `->`\n type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Fills the contents of the buffer in the given byte range with a pattern. The offset and length must match the natural alignment of the pattern type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_5","title":"Operands:","text":"Operand Description pattern
integer or floating-point or index target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbufferload-utilbufferloadop","title":"util.buffer.load
(Util::BufferLoadOp)","text":"Loads a value from a buffer
Syntax:
operation ::= `util.buffer.load` $source `[` $source_offset `for` $length `]`\n `:` type($source) `` `{` $source_size `}` `->` type($result)\n attr-dict-with-keyword\n
Loads a value at a byte offset. Must be aligned to the natural size of the result type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_6","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#results_5","title":"Results:","text":"Result Description result
index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#utilbuffersize-utilbuffersizeop","title":"util.buffer.size
(Util::BufferSizeOp)","text":"Returns the total buffer storage size in bytes
Syntax:
operation ::= `util.buffer.size` $operand\n `:` type($operand)\n attr-dict-with-keyword\n
Returns the total length of the buffer in bytes from its base offset.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_7","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#results_6","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Util/#utilbufferslice-utilbuffersliceop","title":"util.buffer.slice
(Util::BufferSliceOp)","text":"Clones a subregion of a buffer
Syntax:
operation ::= `util.buffer.slice` $source `[` $source_offset `]` attr-dict `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n
Returns a copy of the contents from the source buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription alignment
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/Util/#operands_8","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Util/#results_7","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#utilbufferstorage-utilbufferstorageop","title":"util.buffer.storage
(Util::BufferStorageOp)","text":"Returns the underlying buffer storage range
Syntax:
operation ::= `util.buffer.storage` $operand\n `:` type($operand) `` `{` $operand_size `}` `->` `(` type($result) `,` type($offset) `)`\n attr-dict-with-keyword\n
Returns the buffer storage as a memref that must be offset and restricted to the returned range. The memref may be of any type and the user is responsible for ensuring that the reinterpret_cast-like behavior makes sense for the data they are accessing.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_9","title":"Operands:","text":"Operand Description operand
a reference counted byte buffer operand_size
index"},{"location":"reference/mlir-dialects/Util/#results_8","title":"Results:","text":"Result Description result
memref of any type values offset
index"},{"location":"reference/mlir-dialects/Util/#utilbufferstore-utilbufferstoreop","title":"util.buffer.store
(Util::BufferStoreOp)","text":"Stores a value into a buffer
Syntax:
operation ::= `util.buffer.store` $source `,`\n $target `[` $target_offset `for` $length `]`\n `:` type($source) `->` type($target) `` `{` $target_size `}`\n attr-dict-with-keyword\n
Stores a value at a byte offset. Must be aligned to the natural size of the source type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), SubrangeOperandOpInterface, Util_SizeAwareOp
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_10","title":"Operands:","text":"Operand Description source
index or integer or floating-point target
a reference counted byte buffer target_size
index target_offset
index length
index"},{"location":"reference/mlir-dialects/Util/#utilbuffersubspan-utilbuffersubspanop","title":"util.buffer.subspan
(Util::BufferSubspanOp)","text":"Returns a reference to a subrange of a buffer
Syntax:
operation ::= `util.buffer.subspan` $source `[` $source_offset `]` `:`\n type($source) `` `{` $source_size `}` `->`\n type($result) `` `{` $result_size `}`\n attr-dict-with-keyword\n
Returns a logical view into an underlying source buffer. This induces aliasing and multiple SSA values may allow access to the same underlying buffer storage.
Subspans are a compiler-only concept and are propagated by an analysis pass to result in absolute offsets on accesses any place the subrange would have been used.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SubrangeOperandOpInterface, TiedOpInterface, Util_SizeAwareOp, Util_SubrangeOp, ViewLikeOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_11","title":"Operands:","text":"Operand Description source
a reference counted byte buffer source_size
index source_offset
index result_size
index"},{"location":"reference/mlir-dialects/Util/#results_9","title":"Results:","text":"Result Description result
a reference counted byte buffer"},{"location":"reference/mlir-dialects/Util/#compiler-hint-ops","title":"Compiler hint ops","text":""},{"location":"reference/mlir-dialects/Util/#utiloptimization_barrier-utiloptimizationbarrierop","title":"util.optimization_barrier
(Util::OptimizationBarrierOp)","text":"Prevents compiler optimizations across a value.
Syntax:
operation ::= `util.optimization_barrier` attr-dict\n ($operands^ `:` type($operands))?\n
Wraps any operands in an unoptimizable identity to prevent its results from being folded. It will be dropped during the final step in compilation and has no effect at runtime.
Traits: SameOperandsAndResultType
"},{"location":"reference/mlir-dialects/Util/#operands_12","title":"Operands:","text":"Operand Description operands
variadic of any type"},{"location":"reference/mlir-dialects/Util/#results_10","title":"Results:","text":"Result Description results
variadic of any type"},{"location":"reference/mlir-dialects/Util/#utilunfoldable_constant-utilunfoldableconstantop","title":"util.unfoldable_constant
(Util::UnfoldableConstantOp)","text":"A constant that cannot be folded by the compiler.
Similar to a std.constant, but is declared as having a side effect and has no folder. This is really just syntactic sugar as it is canonicalized to a std.constant wrapped in an util.optimization_barrier.
"},{"location":"reference/mlir-dialects/Util/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription value
::mlir::Attributeany attribute"},{"location":"reference/mlir-dialects/Util/#results_11","title":"Results:","text":"Result Description \u00abunnamed\u00bb any type"},{"location":"reference/mlir-dialects/Util/#utilunreachable-utilunreachableop","title":"util.unreachable
(Util::UnreachableOp)","text":"Unreachable assertion op
Syntax:
operation ::= `util.unreachable` $message attr-dict\n
Signals to the compiler that the parent block should not be reachable. This may be converted into a runtime assertion, though ideally they are stripped during translation.
^bb0:\n %true = arith.constant true\n cond_br %true, ^bb2, ^bb1\n^bb1:\n // Indicates that this branch should never be taken.\n util.unreachable \"shouldn't be here\"\n^bb2:\n ...\n
Traits: ReturnLike, Terminator
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#data-type-conversion-ops","title":"Data type conversion ops","text":""},{"location":"reference/mlir-dialects/Util/#utilnumericoptional_narrow-utilnumericoptionalnarrowop","title":"util.numeric.optional_narrow
(Util::NumericOptionalNarrowOp)","text":"Memorializes an optional numeric narrowing that is valid
Syntax:
operation ::= `util.numeric.optional_narrow` $operand `:` type($operand) `as` $semantic_type attr-dict\n
Serves as a placeholder for points in the computation where an optional numeric narrowing can be performed without loss of information. Such ops can guide optimization passes wishing to perform precision reduction.
In addition to the operand and result type, this op takes an additional semantic_type
attribute representing the semantic target type which can be: * FloatType * Signed IntegerType * Unsigned IntegerType
Note that this semantic_type
must be a sign-carrying integer if using an integer type and cannot be IndexType (i.e. it can be used to indicate a possible narrowing of an IndexType to a specific integer).
If the operand is a TensorType, then the result must be a TensorType. The semantic_type
constrains the element type.
Optionally, the minimum and maximum integer values (for integer semantic types) are tracked if known.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription semantic_type
::mlir::TypeAttrany type attribute min_value
::mlir::IntegerAttrarbitrary integer attribute max_value
::mlir::IntegerAttrarbitrary integer attribute"},{"location":"reference/mlir-dialects/Util/#operands_13","title":"Operands:","text":"Operand Description operand
signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#results_12","title":"Results:","text":"Result Description result
signless integer or floating-point or tensor of signless integer or floating-point values"},{"location":"reference/mlir-dialects/Util/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/Util/#utilglobaladdress-utilglobaladdressop","title":"util.global.address
(Util::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `util.global.address` $global attr-dict `:` qualified(type($result))\n
Returns the address of a global as a typed reference. Can be used with the global load and store indirect ops.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalAddressOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#results_13","title":"Results:","text":"Result Description result
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilgloballoadindirect-utilgloballoadindirectop","title":"util.global.load.indirect
(Util::GlobalLoadIndirectOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `util.global.load.indirect` $global attr-dict `:` qualified(type($global)) `->` type($result)\n
Returns a copy of the global variable value.
Interfaces: Util_GlobalLoadIndirectOpInterface
"},{"location":"reference/mlir-dialects/Util/#operands_14","title":"Operands:","text":"Operand Description global
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#results_14","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilglobalload-utilgloballoadop","title":"util.global.load
(Util::GlobalLoadOp)","text":"Loads a value from a global variable
Syntax:
operation ::= `util.global.load` $global attr-dict `:` type($result)\n
Returns a global variable value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#results_15","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilglobal-utilglobalop","title":"util.global
(Util::GlobalOp)","text":"Stateful global variable declaration
Syntax:
operation ::= `util.global` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Declares a global variable that maintains its value across invocations. The value is tied to the execution context of the module and different contexts will have different variable storage.
Interfaces: Symbol, Util_GlobalOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
::mlir::TypedAttrTypedAttr instance"},{"location":"reference/mlir-dialects/Util/#utilglobalstoreindirect-utilglobalstoreindirectop","title":"util.global.store.indirect
(Util::GlobalStoreIndirectOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `util.global.store.indirect` $value `,` $global attr-dict `:` type($value) `->` qualified(type($global))\n
Stores a copy of the value into a global variable.
Interfaces: Util_GlobalStoreIndirectOpInterface
"},{"location":"reference/mlir-dialects/Util/#operands_15","title":"Operands:","text":"Operand Description value
any type global
a pointer-like reference"},{"location":"reference/mlir-dialects/Util/#utilglobalstore-utilglobalstoreop","title":"util.global.store
(Util::GlobalStoreOp)","text":"Stores a value into a global variable
Syntax:
operation ::= `util.global.store` $value `,` $global attr-dict `:` type($value)\n
Stores a copy of the value into a global variable.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription global
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/Util/#operands_16","title":"Operands:","text":"Operand Description value
any type"},{"location":"reference/mlir-dialects/Util/#list-ops","title":"List ops","text":"Ops for !util.list<T>
(mostly just a placeholder for now).
"},{"location":"reference/mlir-dialects/Util/#utillistcreate-utillistcreateop","title":"util.list.create
(Util::ListCreateOp)","text":"Creates a new empty list
Syntax:
operation ::= `util.list.create` ($initial_capacity^)? attr-dict `:` qualified(type($result))\n
Creates a new empty list with an optional initial capacity.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}, MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_17","title":"Operands:","text":"Operand Description initial_capacity
index"},{"location":"reference/mlir-dialects/Util/#results_16","title":"Results:","text":"Result Description result
dense list container type"},{"location":"reference/mlir-dialects/Util/#utillistget-utillistgetop","title":"util.list.get
(Util::ListGetOp)","text":"Element accessor
Syntax:
operation ::= `util.list.get` $list `[` $index `]` attr-dict `:` custom<ListTypeGet>(type($list), type($result))\n
Returns the value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_18","title":"Operands:","text":"Operand Description list
dense list container type index
index"},{"location":"reference/mlir-dialects/Util/#results_17","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utillistresize-utillistresizeop","title":"util.list.resize
(Util::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `util.list.resize` operands attr-dict `:` qualified(type($list))\n
Resizes the list to contain new_size
elements. This will either truncate the list if the existing size is greater than new_size
or extend the list with the default list value of the element type.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_19","title":"Operands:","text":"Operand Description list
dense list container type new_size
index"},{"location":"reference/mlir-dialects/Util/#utillistset-utillistsetop","title":"util.list.set
(Util::ListSetOp)","text":"Element mutator
Syntax:
operation ::= `util.list.set` $list `[` $index `]` `,` $value attr-dict `:` custom<ListTypeSet>(type($list), type($value))\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_20","title":"Operands:","text":"Operand Description list
dense list container type index
index value
any type"},{"location":"reference/mlir-dialects/Util/#utillistsize-utillistsizeop","title":"util.list.size
(Util::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `util.list.size` operands attr-dict `:` qualified(type($list))\n
Returns the current size of the list in elements.
Interfaces: InferTypeOpInterface, MemoryEffectOpInterface (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/Util/#operands_21","title":"Operands:","text":"Operand Description list
dense list container type"},{"location":"reference/mlir-dialects/Util/#results_18","title":"Results:","text":"Result Description result
index"},{"location":"reference/mlir-dialects/Util/#range-arithmetic-ops","title":"Range arithmetic ops","text":""},{"location":"reference/mlir-dialects/Util/#utilrangeextents-utilrangeextentsop","title":"util.range.extents
(Util::RangeExtentsOp)","text":"Returns the min/max of a union of a set of ranges
Syntax:
operation ::= `util.range.extents` custom<RangeList>($offsets, $lengths) attr-dict `:` type($min)\n
Computes min(offsets) and max(offsets + lengths). Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_22","title":"Operands:","text":"Operand Description offsets
variadic of index or integer lengths
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_19","title":"Results:","text":"Result Description min
index or integer max
index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemax-utilrangemaxop","title":"util.range.max
(Util::RangeMaxOp)","text":"Returns the max of all values
Syntax:
operation ::= `util.range.max` $operands attr-dict `:` type($result)\n
Computes the max of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_23","title":"Operands:","text":"Operand Description operands
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_20","title":"Results:","text":"Result Description result
index or integer"},{"location":"reference/mlir-dialects/Util/#utilrangemin-utilrangeminop","title":"util.range.min
(Util::RangeMinOp)","text":"Returns the min of all values
Syntax:
operation ::= `util.range.min` $operands attr-dict `:` type($result)\n
Computes the min of a variadic list of operands. Though it's possible to express this with standard arithmetic this op enables more semantically meaningful folding/optimizations.
Traits: AlwaysSpeculatableImplTrait, SameOperandsAndResultType, SameVariadicOperandSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_24","title":"Operands:","text":"Operand Description operands
variadic of index or integer"},{"location":"reference/mlir-dialects/Util/#results_21","title":"Results:","text":"Result Description result
index or integer"},{"location":"reference/mlir-dialects/Util/#status-ops","title":"Status ops","text":""},{"location":"reference/mlir-dialects/Util/#utilstatuscheck_ok-utilstatuscheckokop","title":"util.status.check_ok
(Util::StatusCheckOkOp)","text":"Raises a global failure if a status is not 'ok'
Syntax:
operation ::= `util.status.check_ok` $status (`,` $message^)? attr-dict\n
When the status is not 'ok' this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
"},{"location":"reference/mlir-dialects/Util/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/Util/#operands_25","title":"Operands:","text":"Operand Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/Util/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/Util/#utilinitializer-utilinitializerop","title":"util.initializer
(Util::InitializerOp)","text":"Global initialization function
A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.
Traits: IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, Util_InitializerOpInterface
"},{"location":"reference/mlir-dialects/Util/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/Util/#utilinitializerreturn-utilinitializerreturnop","title":"util.initializer.return
(Util::InitializerReturnOp)","text":"Return from a util.initializer
Syntax:
operation ::= `util.initializer.return` attr-dict\n
Returns control from an initializer function.
Traits: AlwaysSpeculatableImplTrait, HasParent, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#type-manipulation-ops","title":"Type manipulation ops","text":""},{"location":"reference/mlir-dialects/Util/#utilcast-utilcastop","title":"util.cast
(Util::CastOp)","text":"Casts one util type to another ala static_cast/dynamic_cast
Syntax:
operation ::= `util.cast` $operand attr-dict `:` type($operand) `to` type($result)\n
Performs a type cast between object types known to the util dialect.
Traits: AlwaysSpeculatableImplTrait
Interfaces: CastOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), TiedOpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_26","title":"Operands:","text":"Operand Description operand
any type"},{"location":"reference/mlir-dialects/Util/#results_22","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#utilcmpeq-utilcmpeqop","title":"util.cmp.eq
(Util::CmpEQOp)","text":"Compares two values for equality
Syntax:
operation ::= `util.cmp.eq` operands attr-dict `:` type($lhs)\n
Compares two operands for equality. This is intended for comparing IREE reference types (like !util.buffer) that cannot be used with std.cmpi.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_27","title":"Operands:","text":"Operand Description lhs
any type rhs
any type"},{"location":"reference/mlir-dialects/Util/#results_23","title":"Results:","text":"Result Description result
1-bit signless integer"},{"location":"reference/mlir-dialects/Util/#utilnull-utilnullop","title":"util.null
(Util::NullOp)","text":"Returns a null type value
Syntax:
operation ::= `util.null` attr-dict `:` type($result)\n
Defines an SSA value that is lowered into dialects supporting null/undefined/optional/etc values.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#results_24","title":"Results:","text":"Result Description result
any type"},{"location":"reference/mlir-dialects/Util/#value-utility-ops","title":"Value utility ops","text":""},{"location":"reference/mlir-dialects/Util/#utilswitch-utilswitchop","title":"util.switch
(Util::SwitchOp)","text":"Primitive switch operation
Syntax:
operation ::= `util.switch` type($default_value) `from`\n custom<TypedValueList>(ref(type($default_value)), $values, type($values))\n `at` $index\n `else` $default_value\n attr-dict\n `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = util.switch %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/Util/#operands_28","title":"Operands:","text":"Operand Description index
index default_value
index or integer or floating-point values
variadic of index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#results_25","title":"Results:","text":"Result Description result
index or integer or floating-point"},{"location":"reference/mlir-dialects/Util/#type-definition","title":"Type definition","text":""},{"location":"reference/mlir-dialects/Util/#buffertype","title":"BufferType","text":"a reference counted byte buffer
Syntax: !util.buffer
A reference counted byte buffer that models a pointer, offset, and length.
"},{"location":"reference/mlir-dialects/Util/#listtype","title":"ListType","text":"dense list container type
Syntax:
!util.list<\n Type # element_type\n>\n
Typed container supporting variant storage.
"},{"location":"reference/mlir-dialects/Util/#parameters","title":"Parameters:","text":"Parameter C++ type Description element_type Type
"},{"location":"reference/mlir-dialects/Util/#objecttype","title":"ObjectType","text":"a placeholder for an unspecified object type
Syntax: !util.object
Describes a runtime object type. These may be reference counted or garbage collected at runtime.
"},{"location":"reference/mlir-dialects/Util/#ptrtype","title":"PtrType","text":"a pointer-like reference
Syntax:
!util.ptr<\n Type # target_type\n>\n
A typed indirect reference to a value. These define a runtime addressable value that is strongly referenced.
"},{"location":"reference/mlir-dialects/Util/#parameters_1","title":"Parameters:","text":"Parameter C++ type Description target_type Type
"},{"location":"reference/mlir-dialects/Util/#varianttype","title":"VariantType","text":"a placeholder for a variant type (?
)
Syntax: !util.variant
Describes a runtime variant type. These may be primitives (i32, f32, etc) or object types.
"},{"location":"reference/mlir-dialects/VM/","title":"VM","text":""},{"location":"reference/mlir-dialects/VM/#vm-dialect","title":"'vm' Dialect","text":"A dialect representing operations against an abstract virtual machine.
The virtual machine ops are designed to be either serialized to a bytecode representation that can be interpreted at runtime or lowered further to static representations such as LLVM IR, C, etc. The idea is that the types and operations performed are generally just encoding resource ownership rules and control flow that can be represented in many different ways by target runtimes. For example, it should be possible to lower the VM dialect to SPIR-V and run the VM entirely within a persistent Vulkan kernel.
With this scalable runtime approach we make some limiting assumptions to keep the required implementations simple. As we assume all real math is happening within dispatch regions the only math we provide is scalar operations used for offset and shape calculations. This also enables simple flow control such as fixed-range loops.
Besides integer values the only other storage type is a variant reference modeling an abstract iree_vm_ref_t. This allows automated reference counting to be relied upon by other dialects built on top of the VM dialect and avoids the need for more verbose manual reference counting logic (that may be difficult or impossible to manage given the coroutine-like nature of the VM). Lowering targets can insert the reference counting as needed.
The types in the VM dialect correspond to the storage rather than value type, with the interpretation of the type encoded on the op.
- 'vm' Dialect
- Operation definition
- Async/fiber ops
- vm.yield (VM::YieldOp)
- Bitwise shift and rotate ops
- vm.shl.i32 (VM::ShlI32Op)
- vm.shl.i64 (VM::ShlI64Op)
- vm.shr.i32.s (VM::ShrI32SOp)
- vm.shr.i32.u (VM::ShrI32UOp)
- vm.shr.i64.s (VM::ShrI64SOp)
- vm.shr.i64.u (VM::ShrI64UOp)
- Buffer ops
- vm.buffer.alloc (VM::BufferAllocOp)
- vm.buffer.clone (VM::BufferCloneOp)
- vm.buffer.compare (VM::BufferCompareOp)
- vm.buffer.copy (VM::BufferCopyOp)
- vm.buffer.fill.f32 (VM::BufferFillF32Op)
- vm.buffer.fill.f64 (VM::BufferFillF64Op)
- vm.buffer.fill.i16 (VM::BufferFillI16Op)
- vm.buffer.fill.i32 (VM::BufferFillI32Op)
- vm.buffer.fill.i64 (VM::BufferFillI64Op)
- vm.buffer.fill.i8 (VM::BufferFillI8Op)
- vm.buffer.length (VM::BufferLengthOp)
- vm.buffer.load.f32 (VM::BufferLoadF32Op)
- vm.buffer.load.f64 (VM::BufferLoadF64Op)
- vm.buffer.load.i16.s (VM::BufferLoadI16SOp)
- vm.buffer.load.i16.u (VM::BufferLoadI16UOp)
- vm.buffer.load.i32 (VM::BufferLoadI32Op)
- vm.buffer.load.i64 (VM::BufferLoadI64Op)
- vm.buffer.load.i8.s (VM::BufferLoadI8SOp)
- vm.buffer.load.i8.u (VM::BufferLoadI8UOp)
- vm.buffer.store.f32 (VM::BufferStoreF32Op)
- vm.buffer.store.f64 (VM::BufferStoreF64Op)
- vm.buffer.store.i16 (VM::BufferStoreI16Op)
- vm.buffer.store.i32 (VM::BufferStoreI32Op)
- vm.buffer.store.i64 (VM::BufferStoreI64Op)
- vm.buffer.store.i8 (VM::BufferStoreI8Op)
- Casting and conversion ops
- vm.bitcast.f32.i32 (VM::BitcastF32I32Op)
- vm.bitcast.f64.i64 (VM::BitcastF64I64Op)
- vm.bitcast.i32.f32 (VM::BitcastI32F32Op)
- vm.bitcast.i64.f64 (VM::BitcastI64F64Op)
- vm.cast.any.ref (VM::CastAnyRefOp)
- vm.cast.f32.si32 (VM::CastF32SI32Op)
- vm.cast.f32.ui32 (VM::CastF32UI32Op)
- vm.cast.ref.any (VM::CastRefAnyOp)
- vm.cast.si32.f32 (VM::CastSI32F32Op)
- vm.cast.ui32.f32 (VM::CastUI32F32Op)
- vm.ext.f32.f64 (VM::ExtF32F64Op)
- vm.ext.i16.i32.s (VM::ExtI16I32SOp)
- vm.ext.i16.i32.u (VM::ExtI16I32UOp)
- vm.ext.i16.i64.s (VM::ExtI16I64SOp)
- vm.ext.i16.i64.u (VM::ExtI16I64UOp)
- vm.ext.i32.i64.s (VM::ExtI32I64SOp)
- vm.ext.i32.i64.u (VM::ExtI32I64UOp)
- vm.ext.i8.i32.s (VM::ExtI8I32SOp)
- vm.ext.i8.i32.u (VM::ExtI8I32UOp)
- vm.ext.i8.i64.s (VM::ExtI8I64SOp)
- vm.ext.i8.i64.u (VM::ExtI8I64UOp)
- vm.trunc.f64.f32 (VM::TruncF64F32Op)
- vm.trunc.i16.i8 (VM::TruncI16I8Op)
- vm.trunc.i32.i16 (VM::TruncI32I16Op)
- vm.trunc.i32.i8 (VM::TruncI32I8Op)
- vm.trunc.i64.i16 (VM::TruncI64I16Op)
- vm.trunc.i64.i32 (VM::TruncI64I32Op)
- vm.trunc.i64.i8 (VM::TruncI64I8Op)
- Comparison ops
- vm.cmp.eq.i32 (VM::CmpEQI32Op)
- vm.cmp.eq.i64 (VM::CmpEQI64Op)
- vm.cmp.gte.i32.s (VM::CmpGTEI32SOp)
- vm.cmp.gte.i32.u (VM::CmpGTEI32UOp)
- vm.cmp.gte.i64.s (VM::CmpGTEI64SOp)
- vm.cmp.gte.i64.u (VM::CmpGTEI64UOp)
- vm.cmp.gt.i32.s (VM::CmpGTI32SOp)
- vm.cmp.gt.i32.u (VM::CmpGTI32UOp)
- vm.cmp.gt.i64.s (VM::CmpGTI64SOp)
- vm.cmp.gt.i64.u (VM::CmpGTI64UOp)
- vm.cmp.lte.i32.s (VM::CmpLTEI32SOp)
- vm.cmp.lte.i32.u (VM::CmpLTEI32UOp)
- vm.cmp.lte.i64.s (VM::CmpLTEI64SOp)
- vm.cmp.lte.i64.u (VM::CmpLTEI64UOp)
- vm.cmp.lt.i32.s (VM::CmpLTI32SOp)
- vm.cmp.lt.i32.u (VM::CmpLTI32UOp)
- vm.cmp.lt.i64.s (VM::CmpLTI64SOp)
- vm.cmp.lt.i64.u (VM::CmpLTI64UOp)
- vm.cmp.ne.i32 (VM::CmpNEI32Op)
- vm.cmp.ne.i64 (VM::CmpNEI64Op)
- vm.cmp.nz.i32 (VM::CmpNZI32Op)
- vm.cmp.nz.i64 (VM::CmpNZI64Op)
- Conditional assignment ops
- vm.select.f32 (VM::SelectF32Op)
- vm.select.f64 (VM::SelectF64Op)
- vm.select.i32 (VM::SelectI32Op)
- vm.select.i64 (VM::SelectI64Op)
- vm.select.ref (VM::SelectRefOp)
- vm.switch.f32 (VM::SwitchF32Op)
- vm.switch.f64 (VM::SwitchF64Op)
- vm.switch.i32 (VM::SwitchI32Op)
- vm.switch.i64 (VM::SwitchI64Op)
- vm.switch.ref (VM::SwitchRefOp)
- Constant ops
- vm.const.f32 (VM::ConstF32Op)
- vm.const.f32.zero (VM::ConstF32ZeroOp)
- vm.const.f64 (VM::ConstF64Op)
- vm.const.f64.zero (VM::ConstF64ZeroOp)
- vm.const.i32 (VM::ConstI32Op)
- vm.const.i32.zero (VM::ConstI32ZeroOp)
- vm.const.i64 (VM::ConstI64Op)
- vm.const.i64.zero (VM::ConstI64ZeroOp)
- vm.const.ref.rodata (VM::ConstRefRodataOp)
- vm.const.ref.zero (VM::ConstRefZeroOp)
- vm.rodata.inline (VM::RodataInlineOp)
- vm.rodata (VM::RodataOp)
- Control flow ops
- vm.br (VM::BranchOp)
- vm.br_table (VM::BranchTableOp)
- vm.call (VM::CallOp)
- vm.call.variadic (VM::CallVariadicOp)
- vm.check.eq (VM::CheckEQOp)
- vm.check.ne (VM::CheckNEOp)
- vm.check.nz (VM::CheckNZOp)
- vm.check.nearly_eq (VM::CheckNearlyEQOp)
- vm.cond_br (VM::CondBranchOp)
- vm.cond_fail (VM::CondFailOp)
- vm.fail (VM::FailOp)
- vm.import.resolved (VM::ImportResolvedOp)
- vm.return (VM::ReturnOp)
- Debugging ops
- vm.break (VM::BreakOp)
- vm.cond_break (VM::CondBreakOp)
- vm.print (VM::PrintOp)
- vm.trace (VM::TraceOp)
- Floating-point arithmetic ops
- vm.abs.f32 (VM::AbsF32Op)
- vm.abs.f64 (VM::AbsF64Op)
- vm.add.f32 (VM::AddF32Op)
- vm.add.f64 (VM::AddF64Op)
- vm.ceil.f32 (VM::CeilF32Op)
- vm.ceil.f64 (VM::CeilF64Op)
- vm.div.f32 (VM::DivF32Op)
- vm.div.f64 (VM::DivF64Op)
- vm.fma.f32 (VM::FMAF32Op)
- vm.fma.f64 (VM::FMAF64Op)
- vm.floor.f32 (VM::FloorF32Op)
- vm.floor.f64 (VM::FloorF64Op)
- vm.max.f32 (VM::MaxF32Op)
- vm.max.f64 (VM::MaxF64Op)
- vm.min.f32 (VM::MinF32Op)
- vm.min.f64 (VM::MinF64Op)
- vm.mul.f32 (VM::MulF32Op)
- vm.mul.f64 (VM::MulF64Op)
- vm.neg.f32 (VM::NegF32Op)
- vm.neg.f64 (VM::NegF64Op)
- vm.rem.f32 (VM::RemF32Op)
- vm.rem.f64 (VM::RemF64Op)
- vm.round.f32.even (VM::RoundF32EvenOp)
- vm.round.f32 (VM::RoundF32Op)
- vm.round.f64.even (VM::RoundF64EvenOp)
- vm.round.f64 (VM::RoundF64Op)
- vm.sub.f32 (VM::SubF32Op)
- vm.sub.f64 (VM::SubF64Op)
- Floating-point comparison ops
- vm.cmp.eq.f32.near (VM::CmpEQF32NearOp)
- vm.cmp.eq.f32.o (VM::CmpEQF32OOp)
- vm.cmp.eq.f32.u (VM::CmpEQF32UOp)
- vm.cmp.eq.f64.near (VM::CmpEQF64NearOp)
- vm.cmp.eq.f64.o (VM::CmpEQF64OOp)
- vm.cmp.eq.f64.u (VM::CmpEQF64UOp)
- vm.cmp.gte.f32.o (VM::CmpGTEF32OOp)
- vm.cmp.gte.f32.u (VM::CmpGTEF32UOp)
- vm.cmp.gte.f64.o (VM::CmpGTEF64OOp)
- vm.cmp.gte.f64.u (VM::CmpGTEF64UOp)
- vm.cmp.gt.f32.o (VM::CmpGTF32OOp)
- vm.cmp.gt.f32.u (VM::CmpGTF32UOp)
- vm.cmp.gt.f64.o (VM::CmpGTF64OOp)
- vm.cmp.gt.f64.u (VM::CmpGTF64UOp)
- vm.cmp.lte.f32.o (VM::CmpLTEF32OOp)
- vm.cmp.lte.f32.u (VM::CmpLTEF32UOp)
- vm.cmp.lte.f64.o (VM::CmpLTEF64OOp)
- vm.cmp.lte.f64.u (VM::CmpLTEF64UOp)
- vm.cmp.lt.f32.o (VM::CmpLTF32OOp)
- vm.cmp.lt.f32.u (VM::CmpLTF32UOp)
- vm.cmp.lt.f64.o (VM::CmpLTF64OOp)
- vm.cmp.lt.f64.u (VM::CmpLTF64UOp)
- vm.cmp.ne.f32.o (VM::CmpNEF32OOp)
- vm.cmp.ne.f32.u (VM::CmpNEF32UOp)
- vm.cmp.ne.f64.o (VM::CmpNEF64OOp)
- vm.cmp.ne.f64.u (VM::CmpNEF64UOp)
- vm.cmp.nz.f32.o (VM::CmpNZF32OOp)
- vm.cmp.nz.f32.u (VM::CmpNZF32UOp)
- vm.cmp.nz.f64.o (VM::CmpNZF64OOp)
- vm.cmp.nz.f64.u (VM::CmpNZF64UOp)
- vm.cmp.nan.f32 (VM::CmpNaNF32Op)
- vm.cmp.nan.f64 (VM::CmpNaNF64Op)
- Floating-point math ops
- vm.atan2.f32 (VM::Atan2F32Op)
- vm.atan2.f64 (VM::Atan2F64Op)
- vm.atan.f32 (VM::AtanF32Op)
- vm.atan.f64 (VM::AtanF64Op)
- vm.cos.f32 (VM::CosF32Op)
- vm.cos.f64 (VM::CosF64Op)
- vm.erf.f32 (VM::ErfF32Op)
- vm.erf.f64 (VM::ErfF64Op)
- vm.exp2.f32 (VM::Exp2F32Op)
- vm.exp2.f64 (VM::Exp2F64Op)
- vm.exp.f32 (VM::ExpF32Op)
- vm.exp.f64 (VM::ExpF64Op)
- vm.expm1.f32 (VM::ExpM1F32Op)
- vm.expm1.f64 (VM::ExpM1F64Op)
- vm.log10.f32 (VM::Log10F32Op)
- vm.log10.f64 (VM::Log10F64Op)
- vm.log1p.f32 (VM::Log1pF32Op)
- vm.log1p.f64 (VM::Log1pF64Op)
- vm.log2.f32 (VM::Log2F32Op)
- vm.log2.f64 (VM::Log2F64Op)
- vm.log.f32 (VM::LogF32Op)
- vm.log.f64 (VM::LogF64Op)
- vm.pow.f32 (VM::PowF32Op)
- vm.pow.f64 (VM::PowF64Op)
- vm.rsqrt.f32 (VM::RsqrtF32Op)
- vm.rsqrt.f64 (VM::RsqrtF64Op)
- vm.sin.f32 (VM::SinF32Op)
- vm.sin.f64 (VM::SinF64Op)
- vm.sqrt.f32 (VM::SqrtF32Op)
- vm.sqrt.f64 (VM::SqrtF64Op)
- vm.tanh.f32 (VM::TanhF32Op)
- vm.tanh.f64 (VM::TanhF64Op)
- Global ops
- vm.global.address (VM::GlobalAddressOp)
- vm.global.f32 (VM::GlobalF32Op)
- vm.global.f64 (VM::GlobalF64Op)
- vm.global.i32 (VM::GlobalI32Op)
- vm.global.i64 (VM::GlobalI64Op)
- vm.global.load.f32 (VM::GlobalLoadF32Op)
- vm.global.load.f64 (VM::GlobalLoadF64Op)
- vm.global.load.i32 (VM::GlobalLoadI32Op)
- vm.global.load.i64 (VM::GlobalLoadI64Op)
- vm.global.load.indirect.f32 (VM::GlobalLoadIndirectF32Op)
- vm.global.load.indirect.f64 (VM::GlobalLoadIndirectF64Op)
- vm.global.load.indirect.i32 (VM::GlobalLoadIndirectI32Op)
- vm.global.load.indirect.i64 (VM::GlobalLoadIndirectI64Op)
- vm.global.load.indirect.ref (VM::GlobalLoadIndirectRefOp)
- vm.global.load.ref (VM::GlobalLoadRefOp)
- vm.global.ref (VM::GlobalRefOp)
- vm.global.store.f32 (VM::GlobalStoreF32Op)
- vm.global.store.f64 (VM::GlobalStoreF64Op)
- vm.global.store.i32 (VM::GlobalStoreI32Op)
- vm.global.store.i64 (VM::GlobalStoreI64Op)
- vm.global.store.indirect.f32 (VM::GlobalStoreIndirectF32Op)
- vm.global.store.indirect.f64 (VM::GlobalStoreIndirectF64Op)
- vm.global.store.indirect.i32 (VM::GlobalStoreIndirectI32Op)
- vm.global.store.indirect.i64 (VM::GlobalStoreIndirectI64Op)
- vm.global.store.indirect.ref (VM::GlobalStoreIndirectRefOp)
- vm.global.store.ref (VM::GlobalStoreRefOp)
- Integer arithmetic ops
- vm.abs.i32 (VM::AbsI32Op)
- vm.abs.i64 (VM::AbsI64Op)
- vm.add.i32 (VM::AddI32Op)
- vm.add.i64 (VM::AddI64Op)
- vm.div.i32.s (VM::DivI32SOp)
- vm.div.i32.u (VM::DivI32UOp)
- vm.div.i64.s (VM::DivI64SOp)
- vm.div.i64.u (VM::DivI64UOp)
- vm.fma.i32 (VM::FMAI32Op)
- vm.fma.i64 (VM::FMAI64Op)
- vm.max.i32.s (VM::MaxI32SOp)
- vm.max.i32.u (VM::MaxI32UOp)
- vm.max.i64.s (VM::MaxI64SOp)
- vm.max.i64.u (VM::MaxI64UOp)
- vm.min.i32.s (VM::MinI32SOp)
- vm.min.i32.u (VM::MinI32UOp)
- vm.min.i64.s (VM::MinI64SOp)
- vm.min.i64.u (VM::MinI64UOp)
- vm.mul.i32 (VM::MulI32Op)
- vm.mul.i64 (VM::MulI64Op)
- vm.rem.i32.s (VM::RemI32SOp)
- vm.rem.i32.u (VM::RemI32UOp)
- vm.rem.i64.s (VM::RemI64SOp)
- vm.rem.i64.u (VM::RemI64UOp)
- vm.sub.i32 (VM::SubI32Op)
- vm.sub.i64 (VM::SubI64Op)
- Integer bit manipulation ops
- vm.and.i32 (VM::AndI32Op)
- vm.and.i64 (VM::AndI64Op)
- vm.ctlz.i32 (VM::CtlzI32Op)
- vm.ctlz.i64 (VM::CtlzI64Op)
- vm.not.i32 (VM::NotI32Op)
- vm.not.i64 (VM::NotI64Op)
- vm.or.i32 (VM::OrI32Op)
- vm.or.i64 (VM::OrI64Op)
- vm.xor.i32 (VM::XorI32Op)
- vm.xor.i64 (VM::XorI64Op)
- List ops
- vm.list.alloc (VM::ListAllocOp)
- vm.list.get.f32 (VM::ListGetF32Op)
- vm.list.get.f64 (VM::ListGetF64Op)
- vm.list.get.i32 (VM::ListGetI32Op)
- vm.list.get.i64 (VM::ListGetI64Op)
- vm.list.get.ref (VM::ListGetRefOp)
- vm.list.reserve (VM::ListReserveOp)
- vm.list.resize (VM::ListResizeOp)
- vm.list.set.f32 (VM::ListSetF32Op)
- vm.list.set.f64 (VM::ListSetF64Op)
- vm.list.set.i32 (VM::ListSetI32Op)
- vm.list.set.i64 (VM::ListSetI64Op)
- vm.list.set.ref (VM::ListSetRefOp)
- vm.list.size (VM::ListSizeOp)
- Ref comparison ops
- vm.cmp.eq.ref (VM::CmpEQRefOp)
- vm.cmp.ne.ref (VM::CmpNERefOp)
- vm.cmp.nz.ref (VM::CmpNZRefOp)
- Structural ops
- vm.export (VM::ExportOp)
- vm.func (VM::FuncOp)
- vm.import (VM::ImportOp)
- vm.initializer (VM::InitializerOp)
- vm.module (VM::ModuleOp)
- vm.module_terminator (VM::ModuleTerminatorOp)
- Attribute definition
- OrdinalCountsAttr
"},{"location":"reference/mlir-dialects/VM/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/VM/#asyncfiber-ops","title":"Async/fiber ops","text":""},{"location":"reference/mlir-dialects/VM/#vmyield-vmyieldop","title":"vm.yield
(VM::YieldOp)","text":"Unconditional fiber yield operation
Syntax:
operation ::= `vm.yield` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Yields the fiber for some (likely short) amount of time. This can be used to perform cooperative scheduling and ensure fair (enough) execution. Execution resumes at the specified target branch.
^bb0: vm.yield ^on_resume ^on_resume: ...
Traits: HasParent, Terminator, Util_YieldPoint
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#bitwise-shift-and-rotate-ops","title":"Bitwise shift and rotate ops","text":""},{"location":"reference/mlir-dialects/VM/#vmshli32-vmshli32op","title":"vm.shl.i32
(VM::ShlI32Op)","text":"Integer shift left operation
Syntax:
operation ::= `vm.shl.i32` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_1","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshli64-vmshli64op","title":"vm.shl.i64
(VM::ShlI64Op)","text":"Integer shift left operation
Syntax:
operation ::= `vm.shl.i64` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_2","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_1","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32s-vmshri32sop","title":"vm.shr.i32.s
(VM::ShrI32SOp)","text":"Signed integer (arithmetic) shift right operation
Syntax:
operation ::= `vm.shr.i32.s` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_3","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_2","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri32u-vmshri32uop","title":"vm.shr.i32.u
(VM::ShrI32UOp)","text":"Unsigned integer (logical) shift right operation
Syntax:
operation ::= `vm.shr.i32.u` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_4","title":"Operands:","text":"Operand Description operand
32-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_3","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64s-vmshri64sop","title":"vm.shr.i64.s
(VM::ShrI64SOp)","text":"Signed integer (arithmetic) shift right operation
Syntax:
operation ::= `vm.shr.i64.s` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_5","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_4","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmshri64u-vmshri64uop","title":"vm.shr.i64.u
(VM::ShrI64UOp)","text":"Unsigned integer (logical) shift right operation
Syntax:
operation ::= `vm.shr.i64.u` $operand `,` $amount attr-dict `:` type($operand)\n
Shifts the operand in a direction by the number of bits specified.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_6","title":"Operands:","text":"Operand Description operand
64-bit signless integer amount
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_5","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#buffer-ops","title":"Buffer ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbufferalloc-vmbufferallocop","title":"vm.buffer.alloc
(VM::BufferAllocOp)","text":"Allocates a new zero-initialized buffer
Syntax:
operation ::= `vm.buffer.alloc` operands attr-dict `:` type($result)\n
Allocates a new zero-initialized buffer with the given size in bytes.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_7","title":"Operands:","text":"Operand Description length
64-bit signless integer alignment
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_6","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmbufferclone-vmbuffercloneop","title":"vm.buffer.clone
(VM::BufferCloneOp)","text":"Clones a buffer
Syntax:
operation ::= `vm.buffer.clone` operands attr-dict `:` type($source_buffer) `->` type($result)\n
Clones a range of the source buffer to produce a mutable buffer with the same contents.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_8","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer length
64-bit signless integer alignment
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_7","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmbuffercompare-vmbuffercompareop","title":"vm.buffer.compare
(VM::BufferCompareOp)","text":"Compares a range of a buffer to another
Syntax:
operation ::= `vm.buffer.compare` operands attr-dict `:` type($lhs_buffer) `,` type($rhs_buffer)\n
Returns 1 if the two ranges are bitwise equivalent, somewhat like memcmp.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_9","title":"Operands:","text":"Operand Description lhs_buffer
ref lhs_offset
64-bit signless integer rhs_buffer
ref rhs_offset
64-bit signless integer length
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_8","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbuffercopy-vmbuffercopyop","title":"vm.buffer.copy
(VM::BufferCopyOp)","text":"Copies a range of a buffer to another
Syntax:
operation ::= `vm.buffer.copy` operands attr-dict `:` type($source_buffer) `->` type($target_buffer)\n
Copies a range of one buffer to another, like memcpy.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_10","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf32-vmbufferfillf32op","title":"vm.buffer.fill.f32
(VM::BufferFillF32Op)","text":"Fills the buffer with the given repeating 32-bit value
Syntax:
operation ::= `vm.buffer.fill.f32` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_11","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfillf64-vmbufferfillf64op","title":"vm.buffer.fill.f64
(VM::BufferFillF64Op)","text":"Fills the buffer with the given repeating 64-bit value
Syntax:
operation ::= `vm.buffer.fill.f64` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_12","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli16-vmbufferfilli16op","title":"vm.buffer.fill.i16
(VM::BufferFillI16Op)","text":"Fills the buffer with the given repeating 16-bit value
Syntax:
operation ::= `vm.buffer.fill.i16` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_13","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
16-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli32-vmbufferfilli32op","title":"vm.buffer.fill.i32
(VM::BufferFillI32Op)","text":"Fills the buffer with the given repeating 32-bit value
Syntax:
operation ::= `vm.buffer.fill.i32` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_14","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli64-vmbufferfilli64op","title":"vm.buffer.fill.i64
(VM::BufferFillI64Op)","text":"Fills the buffer with the given repeating 64-bit value
Syntax:
operation ::= `vm.buffer.fill.i64` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_15","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferfilli8-vmbufferfilli8op","title":"vm.buffer.fill.i8
(VM::BufferFillI8Op)","text":"Fills the buffer with the given repeating 8-bit value
Syntax:
operation ::= `vm.buffer.fill.i8` $target_buffer `,` $target_offset `,` $length `,` $value\n attr-dict `:` type($value) `->` type($target_buffer)\n
Fills an element range of the buffer with the given value, like memset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_16","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer length
64-bit signless integer value
8-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferlength-vmbufferlengthop","title":"vm.buffer.length
(VM::BufferLengthOp)","text":"Returns the byte length of a buffer
Syntax:
operation ::= `vm.buffer.length` operands attr-dict `:` type($buffer) `->` type($result)\n
Returns the total byte length of the given buffer. This is the exact value as specified during buffer allocation though the underlying system buffer may have additional padding.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_17","title":"Operands:","text":"Operand Description buffer
ref"},{"location":"reference/mlir-dialects/VM/#results_9","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf32-vmbufferloadf32op","title":"vm.buffer.load.f32
(VM::BufferLoadF32Op)","text":"32-bit floating-point load
Syntax:
operation ::= `vm.buffer.load.f32` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_18","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_10","title":"Results:","text":"Result Description result
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadf64-vmbufferloadf64op","title":"vm.buffer.load.f64
(VM::BufferLoadF64Op)","text":"64-bit floating-point load
Syntax:
operation ::= `vm.buffer.load.f64` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_19","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_11","title":"Results:","text":"Result Description result
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16s-vmbufferloadi16sop","title":"vm.buffer.load.i16.s
(VM::BufferLoadI16SOp)","text":"Signed 16-bit integer load
Syntax:
operation ::= `vm.buffer.load.i16.s` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_20","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_12","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi16u-vmbufferloadi16uop","title":"vm.buffer.load.i16.u
(VM::BufferLoadI16UOp)","text":"Unsigned 16-bit integer load
Syntax:
operation ::= `vm.buffer.load.i16.u` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_21","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_13","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi32-vmbufferloadi32op","title":"vm.buffer.load.i32
(VM::BufferLoadI32Op)","text":"32-bit integer load
Syntax:
operation ::= `vm.buffer.load.i32` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_22","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_14","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi64-vmbufferloadi64op","title":"vm.buffer.load.i64
(VM::BufferLoadI64Op)","text":"64-bit integer load
Syntax:
operation ::= `vm.buffer.load.i64` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_23","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_15","title":"Results:","text":"Result Description result
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8s-vmbufferloadi8sop","title":"vm.buffer.load.i8.s
(VM::BufferLoadI8SOp)","text":"Signed 8-bit integer load
Syntax:
operation ::= `vm.buffer.load.i8.s` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_24","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_16","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferloadi8u-vmbufferloadi8uop","title":"vm.buffer.load.i8.u
(VM::BufferLoadI8UOp)","text":"Unsigned 8-bit integer load
Syntax:
operation ::= `vm.buffer.load.i8.u` $source_buffer `[` $source_offset `]`\n attr-dict `:` type($source_buffer) `->` type($result)\n
Loads a value from the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_25","title":"Operands:","text":"Operand Description source_buffer
ref source_offset
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_17","title":"Results:","text":"Result Description result
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref32-vmbufferstoref32op","title":"vm.buffer.store.f32
(VM::BufferStoreF32Op)","text":"32-bit floating-point store
Syntax:
operation ::= `vm.buffer.store.f32` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_26","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstoref64-vmbufferstoref64op","title":"vm.buffer.store.f64
(VM::BufferStoreF64Op)","text":"64-bit floating-point store
Syntax:
operation ::= `vm.buffer.store.f64` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_27","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
64-bit float or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei16-vmbufferstorei16op","title":"vm.buffer.store.i16
(VM::BufferStoreI16Op)","text":"Unsigned 16-bit integer store
Syntax:
operation ::= `vm.buffer.store.i16` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_28","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei32-vmbufferstorei32op","title":"vm.buffer.store.i32
(VM::BufferStoreI32Op)","text":"32-bit integer store
Syntax:
operation ::= `vm.buffer.store.i32` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_29","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei64-vmbufferstorei64op","title":"vm.buffer.store.i64
(VM::BufferStoreI64Op)","text":"64-bit integer store
Syntax:
operation ::= `vm.buffer.store.i64` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_30","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
64-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbufferstorei8-vmbufferstorei8op","title":"vm.buffer.store.i8
(VM::BufferStoreI8Op)","text":"Unsigned 8-bit integer store
Syntax:
operation ::= `vm.buffer.store.i8` $value `,` $target_buffer `[` $target_offset `]`\n attr-dict `:` type($value) `->` type($target_buffer)\n
Stores a value to the buffer at the given element offset.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_31","title":"Operands:","text":"Operand Description target_buffer
ref target_offset
64-bit signless integer value
32-bit signless integer or 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#casting-and-conversion-ops","title":"Casting and conversion ops","text":"Casting and type conversion/emulation.
"},{"location":"reference/mlir-dialects/VM/#vmbitcastf32i32-vmbitcastf32i32op","title":"vm.bitcast.f32.i32
(VM::BitcastF32I32Op)","text":"Bitcast from a 32-bit float-point value to a 32-bit integer
Syntax:
operation ::= `vm.bitcast.f32.i32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_32","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_18","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcastf64i64-vmbitcastf64i64op","title":"vm.bitcast.f64.i64
(VM::BitcastF64I64Op)","text":"Bitcast from a 64-bit float-point value to a 64-bit integer
Syntax:
operation ::= `vm.bitcast.f64.i64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_33","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_19","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmbitcasti32f32-vmbitcasti32f32op","title":"vm.bitcast.i32.f32
(VM::BitcastI32F32Op)","text":"Bitcast from a 32-bit integer to a 32-bit float-point value
Syntax:
operation ::= `vm.bitcast.i32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_34","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_20","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmbitcasti64f64-vmbitcasti64f64op","title":"vm.bitcast.i64.f64
(VM::BitcastI64F64Op)","text":"Bitcast from a 64-bit integer to a 64-bit float-point value
Syntax:
operation ::= `vm.bitcast.i64.f64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_35","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_21","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastanyref-vmcastanyrefop","title":"vm.cast.any.ref
(VM::CastAnyRefOp)","text":"Casts from any ref to a specific ref type
Syntax:
operation ::= `vm.cast.any.ref` $operand attr-dict `:` type($operand) `->` type($result)\n
Performs a runtime cast of an opaque !vm.ref<?>
to a specific !vm.ref<T>
and raises an error if the operand does not match the expected type. Null refs can always be cast between types.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_36","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_22","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmcastf32si32-vmcastf32si32op","title":"vm.cast.f32.si32
(VM::CastF32SI32Op)","text":"Cast from a float-point value to a signed integer
Syntax:
operation ::= `vm.cast.f32.si32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_37","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_23","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastf32ui32-vmcastf32ui32op","title":"vm.cast.f32.ui32
(VM::CastF32UI32Op)","text":"Cast from an float-point value to an unsigned integer
Syntax:
operation ::= `vm.cast.f32.ui32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_38","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_24","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcastrefany-vmcastrefanyop","title":"vm.cast.ref.any
(VM::CastRefAnyOp)","text":"Casts from a specific ref to any ref type
Syntax:
operation ::= `vm.cast.ref.any` $operand attr-dict `:` type($operand) `->` type($result)\n
Performs a compile-time widening cast of a specific !vm.ref<T>
to an opaque !vm.ref<?>
.
Traits: AlwaysSpeculatableImplTrait, VM_AssignmentOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_39","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_25","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmcastsi32f32-vmcastsi32f32op","title":"vm.cast.si32.f32
(VM::CastSI32F32Op)","text":"Cast from a signed integer to a float-point value
Syntax:
operation ::= `vm.cast.si32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_40","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_26","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcastui32f32-vmcastui32f32op","title":"vm.cast.ui32.f32
(VM::CastUI32F32Op)","text":"Cast from an unsigned integer to a float-point value
Syntax:
operation ::= `vm.cast.ui32.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_41","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_27","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmextf32f64-vmextf32f64op","title":"vm.ext.f32.f64
(VM::ExtF32F64Op)","text":"Floating-point zero extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.f32.f64` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_42","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_28","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexti16i32s-vmexti16i32sop","title":"vm.ext.i16.i32.s
(VM::ExtI16I32SOp)","text":"Integer sign extend 16 bits to 32 bits
Syntax:
operation ::= `vm.ext.i16.i32.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_43","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_29","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i32u-vmexti16i32uop","title":"vm.ext.i16.i32.u
(VM::ExtI16I32UOp)","text":"Integer zero extend 16 bits to 32 bits
Syntax:
operation ::= `vm.ext.i16.i32.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_44","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_30","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64s-vmexti16i64sop","title":"vm.ext.i16.i64.s
(VM::ExtI16I64SOp)","text":"Integer sign extend 16 bits to 64 bits
Syntax:
operation ::= `vm.ext.i16.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_45","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_31","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti16i64u-vmexti16i64uop","title":"vm.ext.i16.i64.u
(VM::ExtI16I64UOp)","text":"Integer zero extend 16 bits to 64 bits
Syntax:
operation ::= `vm.ext.i16.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_46","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_32","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64s-vmexti32i64sop","title":"vm.ext.i32.i64.s
(VM::ExtI32I64SOp)","text":"Integer sign extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.i32.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_47","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_33","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti32i64u-vmexti32i64uop","title":"vm.ext.i32.i64.u
(VM::ExtI32I64UOp)","text":"Integer zero extend 32 bits to 64 bits
Syntax:
operation ::= `vm.ext.i32.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_48","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_34","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32s-vmexti8i32sop","title":"vm.ext.i8.i32.s
(VM::ExtI8I32SOp)","text":"Integer sign extend 8 bits to 32 bits
Syntax:
operation ::= `vm.ext.i8.i32.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_49","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_35","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i32u-vmexti8i32uop","title":"vm.ext.i8.i32.u
(VM::ExtI8I32UOp)","text":"Integer zero extend 8 bits to 32 bits
Syntax:
operation ::= `vm.ext.i8.i32.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_50","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_36","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64s-vmexti8i64sop","title":"vm.ext.i8.i64.s
(VM::ExtI8I64SOp)","text":"Integer sign extend 8 bits to 64 bits
Syntax:
operation ::= `vm.ext.i8.i64.s` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_51","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_37","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmexti8i64u-vmexti8i64uop","title":"vm.ext.i8.i64.u
(VM::ExtI8I64UOp)","text":"Integer zero extend 8 bits to 64 bits
Syntax:
operation ::= `vm.ext.i8.i64.u` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_52","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_38","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtruncf64f32-vmtruncf64f32op","title":"vm.trunc.f64.f32
(VM::TruncF64F32Op)","text":"Floating-point truncate to 32 bits
Syntax:
operation ::= `vm.trunc.f64.f32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_53","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_39","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtrunci16i8-vmtrunci16i8op","title":"vm.trunc.i16.i8
(VM::TruncI16I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i16.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_54","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_40","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i16-vmtrunci32i16op","title":"vm.trunc.i32.i16
(VM::TruncI32I16Op)","text":"Integer truncate to 16 bits
Syntax:
operation ::= `vm.trunc.i32.i16` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_55","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_41","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci32i8-vmtrunci32i8op","title":"vm.trunc.i32.i8
(VM::TruncI32I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i32.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_56","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_42","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i16-vmtrunci64i16op","title":"vm.trunc.i64.i16
(VM::TruncI64I16Op)","text":"Integer truncate to 16 bits
Syntax:
operation ::= `vm.trunc.i64.i16` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_57","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_43","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i32-vmtrunci64i32op","title":"vm.trunc.i64.i32
(VM::TruncI64I32Op)","text":"Integer truncate to 32 bits
Syntax:
operation ::= `vm.trunc.i64.i32` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_58","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_44","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmtrunci64i8-vmtrunci64i8op","title":"vm.trunc.i64.i8
(VM::TruncI64I8Op)","text":"Integer truncate to 8 bits
Syntax:
operation ::= `vm.trunc.i64.i8` $operand attr-dict `:` type($operand) `->` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_59","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_45","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#comparison-ops","title":"Comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqi32-vmcmpeqi32op","title":"vm.cmp.eq.i32
(VM::CmpEQI32Op)","text":"Integer equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.i32` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_60","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_46","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqi64-vmcmpeqi64op","title":"vm.cmp.eq.i64
(VM::CmpEQI64Op)","text":"Integer equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.i64` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_61","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_47","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32s-vmcmpgtei32sop","title":"vm.cmp.gte.i32.s
(VM::CmpGTEI32SOp)","text":"Signed integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_62","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_48","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei32u-vmcmpgtei32uop","title":"vm.cmp.gte.i32.u
(VM::CmpGTEI32UOp)","text":"Unsigned integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_63","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_49","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64s-vmcmpgtei64sop","title":"vm.cmp.gte.i64.s
(VM::CmpGTEI64SOp)","text":"Signed integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_64","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_50","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtei64u-vmcmpgtei64uop","title":"vm.cmp.gte.i64.u
(VM::CmpGTEI64UOp)","text":"Unsigned integer greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_65","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_51","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32s-vmcmpgti32sop","title":"vm.cmp.gt.i32.s
(VM::CmpGTI32SOp)","text":"Signed integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_66","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_52","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti32u-vmcmpgti32uop","title":"vm.cmp.gt.i32.u
(VM::CmpGTI32UOp)","text":"Unsigned integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_67","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_53","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64s-vmcmpgti64sop","title":"vm.cmp.gt.i64.s
(VM::CmpGTI64SOp)","text":"Signed integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_68","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_54","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgti64u-vmcmpgti64uop","title":"vm.cmp.gt.i64.u
(VM::CmpGTI64UOp)","text":"Unsigned integer greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_69","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_55","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32s-vmcmpltei32sop","title":"vm.cmp.lte.i32.s
(VM::CmpLTEI32SOp)","text":"Signed integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_70","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_56","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei32u-vmcmpltei32uop","title":"vm.cmp.lte.i32.u
(VM::CmpLTEI32UOp)","text":"Unsigned integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_71","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_57","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64s-vmcmpltei64sop","title":"vm.cmp.lte.i64.s
(VM::CmpLTEI64SOp)","text":"Signed integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_72","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_58","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltei64u-vmcmpltei64uop","title":"vm.cmp.lte.i64.u
(VM::CmpLTEI64UOp)","text":"Unsigned integer less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_73","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_59","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32s-vmcmplti32sop","title":"vm.cmp.lt.i32.s
(VM::CmpLTI32SOp)","text":"Signed integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i32.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_74","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_60","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti32u-vmcmplti32uop","title":"vm.cmp.lt.i32.u
(VM::CmpLTI32UOp)","text":"Unsigned integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_75","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_61","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64s-vmcmplti64sop","title":"vm.cmp.lt.i64.s
(VM::CmpLTI64SOp)","text":"Signed integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i64.s` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_76","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_62","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmplti64u-vmcmplti64uop","title":"vm.cmp.lt.i64.u
(VM::CmpLTI64UOp)","text":"Unsigned integer less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.i64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_77","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_63","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei32-vmcmpnei32op","title":"vm.cmp.ne.i32
(VM::CmpNEI32Op)","text":"Integer inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.i32` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_78","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_64","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnei64-vmcmpnei64op","title":"vm.cmp.ne.i64
(VM::CmpNEI64Op)","text":"Integer inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.i64` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_79","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_65","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi32-vmcmpnzi32op","title":"vm.cmp.nz.i32
(VM::CmpNZI32Op)","text":"Integer non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.i32` $operand attr-dict `:` type($operand)\n
Compares the given integer operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_80","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_66","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzi64-vmcmpnzi64op","title":"vm.cmp.nz.i64
(VM::CmpNZI64Op)","text":"Integer non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.i64` $operand attr-dict `:` type($operand)\n
Compares the given integer operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_81","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_67","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#conditional-assignment-ops","title":"Conditional assignment ops","text":""},{"location":"reference/mlir-dialects/VM/#vmselectf32-vmselectf32op","title":"vm.select.f32
(VM::SelectF32Op)","text":"Floating-point select operation
Syntax:
operation ::= `vm.select.f32` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_82","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
32-bit float false_value
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_68","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmselectf64-vmselectf64op","title":"vm.select.f64
(VM::SelectF64Op)","text":"Floating-point select operation
Syntax:
operation ::= `vm.select.f64` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_83","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
64-bit float false_value
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_69","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmselecti32-vmselecti32op","title":"vm.select.i32
(VM::SelectI32Op)","text":"Integer select operation
Syntax:
operation ::= `vm.select.i32` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_84","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
32-bit signless integer false_value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_70","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselecti64-vmselecti64op","title":"vm.select.i64
(VM::SelectI64Op)","text":"Integer select operation
Syntax:
operation ::= `vm.select.i64` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen. The true and false values must have the same types. For example, the maximum operation is obtained by combining \"select\" with \"cmpi\" as follows:
%2 = vm.cmp.gt.i32.s %0, %1 : i32\n%3 = vm.select.i32 %2, %0, %1 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_85","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
64-bit signless integer false_value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_71","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmselectref-vmselectrefop","title":"vm.select.ref
(VM::SelectRefOp)","text":"Ref select operation
Syntax:
operation ::= `vm.select.ref` operands attr-dict `:` type($result)\n
Chooses one value based on a binary condition supplied as its first operand. If the value of the condition is true the true_value
operand is chosen, otherwise the false_value
operand is chosen.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_86","title":"Operands:","text":"Operand Description condition
32-bit signless integer true_value
ref false_value
ref"},{"location":"reference/mlir-dialects/VM/#results_72","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmswitchf32-vmswitchf32op","title":"vm.switch.f32
(VM::SwitchF32Op)","text":"Floating-point switch operation
Syntax:
operation ::= `vm.switch.f32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_87","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
32-bit float values
variadic of 32-bit float"},{"location":"reference/mlir-dialects/VM/#results_73","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchf64-vmswitchf64op","title":"vm.switch.f64
(VM::SwitchF64Op)","text":"Floating-point switch operation
Syntax:
operation ::= `vm.switch.f64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.f32 %index[%c100, %c200, %c300] else %c5 : f32\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_88","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
64-bit float values
variadic of 64-bit float"},{"location":"reference/mlir-dialects/VM/#results_74","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmswitchi32-vmswitchi32op","title":"vm.switch.i32
(VM::SwitchI32Op)","text":"Integer switch operation
Syntax:
operation ::= `vm.switch.i32` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_89","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
32-bit signless integer values
variadic of 32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_75","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchi64-vmswitchi64op","title":"vm.switch.i64
(VM::SwitchI64Op)","text":"Integer switch operation
Syntax:
operation ::= `vm.switch.i64` $index `[` $values `]` `else` $default_value attr-dict `:` type($result)\n
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %index to cases of %c100/%c200/%c300 if index==0, ==1, ==2.\n// If %index is out of range (<0 or >2) then default to %c5.\n%0 = vm.switch.i32 %index[%c100, %c200, %c300] else %c5 : i32\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_90","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
64-bit signless integer values
variadic of 64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_76","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmswitchref-vmswitchrefop","title":"vm.switch.ref
(VM::SwitchRefOp)","text":"Ref switch operation
Returns the value with the given index
in values
or default_value
if the index is out of bounds.
// Switch %arg0 to cases of %r0/%r1/%r2 if arg0==0, ==1, ==2.\n// If %arg0 is out of range (<0 or >2) then default to %null.\n%0 = vm.switch.ref %index[%r0, %r1, %r2] else %null : vm.ref<!foo>\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_91","title":"Operands:","text":"Operand Description index
32-bit signless integer default_value
ref values
variadic of ref"},{"location":"reference/mlir-dialects/VM/#results_77","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#constant-ops","title":"Constant ops","text":""},{"location":"reference/mlir-dialects/VM/#vmconstf32-vmconstf32op","title":"vm.const.f32
(VM::ConstF32Op)","text":"32-bit floating-point constant operation
Syntax:
operation ::= `vm.const.f32` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription value
FloatAttr32-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_78","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf32zero-vmconstf32zeroop","title":"vm.const.f32.zero
(VM::ConstF32ZeroOp)","text":"32-bit floating-point constant zero operation
Syntax:
operation ::= `vm.const.f32.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_79","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64-vmconstf64op","title":"vm.const.f64
(VM::ConstF64Op)","text":"64-bit floating-point constant operation
Syntax:
operation ::= `vm.const.f64` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription value
FloatAttr64-bit floating-point value"},{"location":"reference/mlir-dialects/VM/#results_80","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconstf64zero-vmconstf64zeroop","title":"vm.const.f64.zero
(VM::ConstF64ZeroOp)","text":"64-bit floating-point constant zero operation
Syntax:
operation ::= `vm.const.f64.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_81","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmconsti32-vmconsti32op","title":"vm.const.i32
(VM::ConstI32Op)","text":"32-bit integer constant operation
Syntax:
operation ::= `vm.const.i32` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription value
IntegerAttr32-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_82","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti32zero-vmconsti32zeroop","title":"vm.const.i32.zero
(VM::ConstI32ZeroOp)","text":"32-bit integer constant zero operation
Syntax:
operation ::= `vm.const.i32.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_83","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64-vmconsti64op","title":"vm.const.i64
(VM::ConstI64Op)","text":"64-bit integer constant operation
Syntax:
operation ::= `vm.const.i64` $value attr-dict\n
Defines a constant value that is treated as a scalar literal at runtime.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription value
IntegerAttr64-bit integer value"},{"location":"reference/mlir-dialects/VM/#results_84","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconsti64zero-vmconsti64zeroop","title":"vm.const.i64.zero
(VM::ConstI64ZeroOp)","text":"64-bit integer constant zero operation
Syntax:
operation ::= `vm.const.i64.zero` attr-dict\n
Defines a constant zero primitive.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_85","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmconstrefrodata-vmconstrefrodataop","title":"vm.const.ref.rodata
(VM::ConstRefRodataOp)","text":"Constant rodata access operation
Syntax:
operation ::= `vm.const.ref.rodata` $rodata attr-dict `:` type($value)\n
Returns a reference to a read-only buffer.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_4","title":"Attributes:","text":"AttributeMLIR TypeDescription rodata
::mlir::FlatSymbolRefAttrflat symbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_86","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmconstrefzero-vmconstrefzeroop","title":"vm.const.ref.zero
(VM::ConstRefZeroOp)","text":"Null ref constant operation
Syntax:
operation ::= `vm.const.ref.zero` `:` type($result) attr-dict\n
Defines a constant null ref that can be used in comparisons and initialization.
Traits: AlwaysSpeculatableImplTrait, ConstantLike
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#results_87","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmrodatainline-vmrodatainlineop","title":"vm.rodata.inline
(VM::RodataInlineOp)","text":"Inlined constant rodata
Syntax:
operation ::= `vm.rodata.inline` ($name^)? attr-dict `:` type($result) `=` $value\n
vm.rodata that can be embedded inline in functions. See vm.rodata for more information.
Traits: AlwaysSpeculatableImplTrait, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_5","title":"Attributes:","text":"AttributeMLIR TypeDescription name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttr64-bit signless integer attribute mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#results_88","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmrodata-vmrodataop","title":"vm.rodata
(VM::RodataOp)","text":"Read-only data definition operation
Syntax:
operation ::= `vm.rodata` custom<SymbolVisibility>($sym_visibility) $sym_name attr-dict $value\n
Defines a blob of read-only constant data that can be represented as a ref. This can be used to store arbitrary data within modules such as large constant buffers and other file contents.
Note that the data is reference counted as a way to track its usage once the value leaves the module. For example, returning rodata from an exported function must keep the data (possibly backed by mmap) valid for its entire lifetime.
By default all rodata will be aligned in the final module output at a 16-byte granularity. An optional alignment can be specified to override the default for cases where larger or smaller alignments are needed.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_6","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute value
::mlir::Attributebuffer-like constant attribute values alignment
::mlir::IntegerAttr64-bit signless integer attribute ordinal
::mlir::IntegerAttrordinal value mime_type
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#control-flow-ops","title":"Control flow ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbr-vmbranchop","title":"vm.br
(VM::BranchOp)","text":"Unconditional branch operation
Syntax:
operation ::= `vm.br` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Represents an unconditional branch operation that branches to a target block with the given set of arguments.
^bb0(...): vm.br ^bb1(%a) ^bb1(%blockArg1): ...
Traits: Terminator
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_92","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_1","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmbr_table-vmbranchtableop","title":"vm.br_table
(VM::BranchTableOp)","text":"Branch table operation
Syntax:
operation ::= `vm.br_table` $index ` ` `{` `\\n`\n custom<BranchTableCases>(\n $defaultDestination, $defaultOperands, type($defaultOperands),\n $caseDestinations, $caseOperands, type($caseOperands))\n `}`\n attr-dict\n
Represents a branch table instructing execution to branch to the block with the specified index. If the index is out of bounds then execution will branch to the default block.
vm.br_table %index { default: ^bb1(%a : i64), 0: ^bb2, 1: ^bb3(%c : i64) }
Traits: AlwaysSpeculatableImplTrait, AttrSizedOperandSegments, Terminator
Interfaces: BranchOpInterface, ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_7","title":"Attributes:","text":"AttributeMLIR TypeDescription case_operand_segments
::mlir::DenseI32ArrayAttri32 dense array attribute"},{"location":"reference/mlir-dialects/VM/#operands_93","title":"Operands:","text":"Operand Description index
32-bit signless integer defaultOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref caseOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_2","title":"Successors:","text":"Successor Description defaultDestination
any successor caseDestinations
any successor"},{"location":"reference/mlir-dialects/VM/#vmcall-vmcallop","title":"vm.call
(VM::CallOp)","text":"Call operation
Syntax:
operation ::= `vm.call` $callee `(` operands `)` attr-dict `:` functional-type(operands, results)\n
Calls an internal VM function with the given arguments.
Interfaces: CallOpInterface, MemoryEffectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_8","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_94","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_89","title":"Results:","text":"Result Description results
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcallvariadic-vmcallvariadicop","title":"vm.call.variadic
(VM::CallVariadicOp)","text":"Call operation with variadic arguments
Calls an internal VM function with the given arguments. One or more of the arguments may be variadic, encoded as segmented sized operand lists.
Variadic arguments must be specified with a total count in the segment_sizes attribute.
Interfaces: CallOpInterface, MemoryEffectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_9","title":"Attributes:","text":"AttributeMLIR TypeDescription callee
FlatSymbolRefAttrsymbol reference attribute segment_sizes
::mlir::DenseIntElementsAttr16-bit signless integer elements attribute segment_types
::mlir::ArrayAttrtype array attribute"},{"location":"reference/mlir-dialects/VM/#operands_95","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#results_90","title":"Results:","text":"Result Description results
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckeq-vmcheckeqop","title":"vm.check.eq
(VM::CheckEQOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_10","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_96","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcheckne-vmcheckneop","title":"vm.check.ne
(VM::CheckNEOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.ne` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_11","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_97","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknz-vmchecknzop","title":"vm.check.nz
(VM::CheckNZOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.nz` $value (`,` $message^)? attr-dict `:` type($value)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_12","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_98","title":"Operands:","text":"Operand Description value
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmchecknearly_eq-vmchecknearlyeqop","title":"vm.check.nearly_eq
(VM::CheckNearlyEQOp)","text":"Raises a global failure if the condition is true
Syntax:
operation ::= `vm.check.nearly_eq` $lhs `,` $rhs (`,` $message^)? attr-dict `:` type($lhs)\n
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
This is implemented as a pseudo-op that transforms into a vm.cond_fail operation.
vm.check.eq %a, %b, \"a == b\" : i32\nvm.check.nz %ref, \"!null\" : !vm.ref<?>\n
Traits: Commutative, VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_13","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_99","title":"Operands:","text":"Operand Description lhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref rhs
32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmcond_br-vmcondbranchop","title":"vm.cond_br
(VM::CondBranchOp)","text":"Conditional branch operation
Syntax:
operation ::= `vm.cond_br` $condition `,`\n $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`\n $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?\n attr-dict\n
Represents a conditional branch operation that branches to one of the two target blocks with the given set of arguments.
^bb0(...): vm.cond_br %condition, ^bb1(%a), ^bb2(%b) ^bb1(%blockArg1): ... ^bb2(%blockArg2): ...
Traits: AttrSizedOperandSegments, Terminator
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_100","title":"Operands:","text":"Operand Description condition
32-bit signless integer trueDestOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref falseDestOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_3","title":"Successors:","text":"Successor Description trueDest
any successor falseDest
any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_fail-vmcondfailop","title":"vm.cond_fail
(VM::CondFailOp)","text":"Raises a global failure if the condition is true
When the condition is true this signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
This is implemented as a pseudo-op that transforms into a vm.fail operation guarded by the condition.
%nz = vm.cmp.nz.i32 %value : i32\n%statusCode = vm.const.i32 9\nvm.cond_fail %nz, %statusCode, \"expected non-zero\"\n
Traits: VM_PseudoOp
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_14","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_101","title":"Operands:","text":"Operand Description condition
32-bit signless integer status
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfail-vmfailop","title":"vm.fail
(VM::FailOp)","text":"Raises a global failure
Syntax:
operation ::= `vm.fail` $status (`,` $message^)? attr-dict\n
Signals a runtime failure that causes the entire active invocation - and possibly all in-flight and pending invocations - to fail with the given status. The status will be propagated back via the available runtime error handling mechanisms such as semaphores or synchronous invocation results.
As the IREE execution model is deeply pipelined it's possible that failures have a latency between when they are emitted and when the application can observe the failure. It's also possible that other work that is in-flight or pending when the failure occurs will complete.
%statusCode = vm.const.i32 9\nvm.fail %statusCode, \"oh no!\"\n
Traits: Terminator
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_15","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_102","title":"Operands:","text":"Operand Description status
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmimportresolved-vmimportresolvedop","title":"vm.import.resolved
(VM::ImportResolvedOp)","text":"Returns true if an optional import was resolved at runtime
Syntax:
operation ::= `vm.import.resolved` $import attr-dict `:` type($result)\n
Allows for checking whether a optional import was resolved at runtime. If this returns false then attempting to call the imported function will result in a failure at runtime.
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_16","title":"Attributes:","text":"AttributeMLIR TypeDescription import
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_91","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmreturn-vmreturnop","title":"vm.return
(VM::ReturnOp)","text":"Return operation
Syntax:
operation ::= `vm.return` attr-dict ($operands^ `:` type($operands))?\n
Represents a return operation within a function.
vm.func @foo(%0: i32, %1: f8) -> (i32, f8) {\n vm.return %0, %1 : i32, f8\n}\n
Traits: AlwaysSpeculatableImplTrait, ReturnLike, Terminator
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), RegionBranchTerminatorOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_103","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#debugging-ops","title":"Debugging ops","text":""},{"location":"reference/mlir-dialects/VM/#vmbreak-vmbreakop","title":"vm.break
(VM::BreakOp)","text":"Unconditional debug break operation
Syntax:
operation ::= `vm.break` $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict\n
Breaks into the attached debugger or asks for attaching a debugger. After resuming (or if a debugger is not attached) execution will continue at the target block.
Traits: Terminator, Util_YieldPoint, VM_DebugOnly, VM_FullBarrier
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_104","title":"Operands:","text":"Operand Description destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_4","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmcond_break-vmcondbreakop","title":"vm.cond_break
(VM::CondBreakOp)","text":"Conditional debug break operation
Syntax:
operation ::= `vm.cond_break` $condition `,` $dest (`(` $destOperands^ `:` type($destOperands) `)`)?\n attr-dict\n
Breaks into the attached debugger or asks for attaching a debugger if the provided condition is true. After resuming (or if a debugger is not attached) execution will continue at the target block.
Traits: Terminator, Util_YieldPoint, VM_DebugOnly, VM_FullBarrier
Interfaces: BranchOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_105","title":"Operands:","text":"Operand Description condition
32-bit signless integer destOperands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#successors_5","title":"Successors:","text":"Successor Description dest
any successor"},{"location":"reference/mlir-dialects/VM/#vmprint-vmprintop","title":"vm.print
(VM::PrintOp)","text":"Message printing operation
Syntax:
operation ::= `vm.print` $message `(` operands `)` attr-dict `:` type(operands)\n
Prints the given string message and zero or more values.
Traits: VM_DebugOnly, VM_FullBarrier
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_17","title":"Attributes:","text":"AttributeMLIR TypeDescription message
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_106","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#vmtrace-vmtraceop","title":"vm.trace
(VM::TraceOp)","text":"Trace value(s) operation
Syntax:
operation ::= `vm.trace` $event_name `(` operands `)` attr-dict `:` type(operands)\n
Traces one or more values at the time the operation is executed. These values will be encoded into the active trace depending on the active trace verbosity setting.
Traits: VM_DebugOnly, VM_FullBarrier
Interfaces: VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_18","title":"Attributes:","text":"AttributeMLIR TypeDescription event_name
::mlir::StringAttrstring attribute"},{"location":"reference/mlir-dialects/VM/#operands_107","title":"Operands:","text":"Operand Description operands
variadic of 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float or 32-bit signless integer or ref"},{"location":"reference/mlir-dialects/VM/#floating-point-arithmetic-ops","title":"Floating-point arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsf32-vmabsf32op","title":"vm.abs.f32
(VM::AbsF32Op)","text":"Floating point absolute-value operation
Syntax:
operation ::= `vm.abs.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_108","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_92","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmabsf64-vmabsf64op","title":"vm.abs.f64
(VM::AbsF64Op)","text":"Floating point absolute-value operation
Syntax:
operation ::= `vm.abs.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_109","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_93","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf32-vmaddf32op","title":"vm.add.f32
(VM::AddF32Op)","text":"Floating-point add operation
Syntax:
operation ::= `vm.add.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_110","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_94","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmaddf64-vmaddf64op","title":"vm.add.f64
(VM::AddF64Op)","text":"Floating-point add operation
Syntax:
operation ::= `vm.add.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_111","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_95","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf32-vmceilf32op","title":"vm.ceil.f32
(VM::CeilF32Op)","text":"Floating point ceiling operation
Syntax:
operation ::= `vm.ceil.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_112","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_96","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmceilf64-vmceilf64op","title":"vm.ceil.f64
(VM::CeilF64Op)","text":"Floating point ceiling operation
Syntax:
operation ::= `vm.ceil.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_113","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_97","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf32-vmdivf32op","title":"vm.div.f32
(VM::DivF32Op)","text":"Floating point division operation
Syntax:
operation ::= `vm.div.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_114","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_98","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmdivf64-vmdivf64op","title":"vm.div.f64
(VM::DivF64Op)","text":"Floating point division operation
Syntax:
operation ::= `vm.div.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_115","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_99","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf32-vmfmaf32op","title":"vm.fma.f32
(VM::FMAF32Op)","text":"Floating point fused multiply-add operation (a*b+c)
Syntax:
operation ::= `vm.fma.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_116","title":"Operands:","text":"Operand Description a
32-bit float b
32-bit float c
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_100","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfmaf64-vmfmaf64op","title":"vm.fma.f64
(VM::FMAF64Op)","text":"Floating point fused multiply-add operation (a*b+c)
Syntax:
operation ::= `vm.fma.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_117","title":"Operands:","text":"Operand Description a
64-bit float b
64-bit float c
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_101","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf32-vmfloorf32op","title":"vm.floor.f32
(VM::FloorF32Op)","text":"Floating point floor operation
Syntax:
operation ::= `vm.floor.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_118","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_102","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmfloorf64-vmfloorf64op","title":"vm.floor.f64
(VM::FloorF64Op)","text":"Floating point floor operation
Syntax:
operation ::= `vm.floor.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_119","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_103","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf32-vmmaxf32op","title":"vm.max.f32
(VM::MaxF32Op)","text":"Floating point maximum operation
Syntax:
operation ::= `vm.max.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_120","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_104","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmaxf64-vmmaxf64op","title":"vm.max.f64
(VM::MaxF64Op)","text":"Floating point maximum operation
Syntax:
operation ::= `vm.max.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_121","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_105","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf32-vmminf32op","title":"vm.min.f32
(VM::MinF32Op)","text":"Floating point minimum operation
Syntax:
operation ::= `vm.min.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_122","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_106","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmminf64-vmminf64op","title":"vm.min.f64
(VM::MinF64Op)","text":"Floating point minimum operation
Syntax:
operation ::= `vm.min.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_123","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_107","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf32-vmmulf32op","title":"vm.mul.f32
(VM::MulF32Op)","text":"Floating point multiplication operation
Syntax:
operation ::= `vm.mul.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_124","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_108","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmmulf64-vmmulf64op","title":"vm.mul.f64
(VM::MulF64Op)","text":"Floating point multiplication operation
Syntax:
operation ::= `vm.mul.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_125","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_109","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf32-vmnegf32op","title":"vm.neg.f32
(VM::NegF32Op)","text":"Floating point negation operation
Syntax:
operation ::= `vm.neg.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_126","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_110","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmnegf64-vmnegf64op","title":"vm.neg.f64
(VM::NegF64Op)","text":"Floating point negation operation
Syntax:
operation ::= `vm.neg.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_127","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_111","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf32-vmremf32op","title":"vm.rem.f32
(VM::RemF32Op)","text":"Floating point remainder operation
Syntax:
operation ::= `vm.rem.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_128","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_112","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmremf64-vmremf64op","title":"vm.rem.f64
(VM::RemF64Op)","text":"Floating point remainder operation
Syntax:
operation ::= `vm.rem.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_129","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_113","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32even-vmroundf32evenop","title":"vm.round.f32.even
(VM::RoundF32EvenOp)","text":"Rounds the value to the nearest even integer
Syntax:
operation ::= `vm.round.f32.even` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_130","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_114","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf32-vmroundf32op","title":"vm.round.f32
(VM::RoundF32Op)","text":"Rounds the value to the nearest integer away from zero
Syntax:
operation ::= `vm.round.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_131","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_115","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64even-vmroundf64evenop","title":"vm.round.f64.even
(VM::RoundF64EvenOp)","text":"Rounds the value to the nearest even integer
Syntax:
operation ::= `vm.round.f64.even` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_132","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_116","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmroundf64-vmroundf64op","title":"vm.round.f64
(VM::RoundF64Op)","text":"Rounds the value to the nearest integer away from zero
Syntax:
operation ::= `vm.round.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_133","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_117","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf32-vmsubf32op","title":"vm.sub.f32
(VM::SubF32Op)","text":"Floating point subtraction operation
Syntax:
operation ::= `vm.sub.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_134","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_118","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsubf64-vmsubf64op","title":"vm.sub.f64
(VM::SubF64Op)","text":"Floating point subtraction operation
Syntax:
operation ::= `vm.sub.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_135","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_119","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#floating-point-comparison-ops","title":"Floating-point comparison ops","text":""},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32near-vmcmpeqf32nearop","title":"vm.cmp.eq.f32.near
(VM::CmpEQF32NearOp)","text":"Near floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.near` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_136","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_120","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32o-vmcmpeqf32oop","title":"vm.cmp.eq.f32.o
(VM::CmpEQF32OOp)","text":"Ordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_137","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_121","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf32u-vmcmpeqf32uop","title":"vm.cmp.eq.f32.u
(VM::CmpEQF32UOp)","text":"Unordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_138","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_122","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64near-vmcmpeqf64nearop","title":"vm.cmp.eq.f64.near
(VM::CmpEQF64NearOp)","text":"Near floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.near` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_139","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_123","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64o-vmcmpeqf64oop","title":"vm.cmp.eq.f64.o
(VM::CmpEQF64OOp)","text":"Ordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_140","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_124","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpeqf64u-vmcmpeqf64uop","title":"vm.cmp.eq.f64.u
(VM::CmpEQF64UOp)","text":"Unordered floating-point equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_141","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_125","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32o-vmcmpgtef32oop","title":"vm.cmp.gte.f32.o
(VM::CmpGTEF32OOp)","text":"Ordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_142","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_126","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef32u-vmcmpgtef32uop","title":"vm.cmp.gte.f32.u
(VM::CmpGTEF32UOp)","text":"Unordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_143","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_127","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64o-vmcmpgtef64oop","title":"vm.cmp.gte.f64.o
(VM::CmpGTEF64OOp)","text":"Ordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_144","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_128","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtef64u-vmcmpgtef64uop","title":"vm.cmp.gte.f64.u
(VM::CmpGTEF64UOp)","text":"Unordered floating-point greater-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.gte.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_145","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_129","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32o-vmcmpgtf32oop","title":"vm.cmp.gt.f32.o
(VM::CmpGTF32OOp)","text":"Ordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_146","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_130","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf32u-vmcmpgtf32uop","title":"vm.cmp.gt.f32.u
(VM::CmpGTF32UOp)","text":"Unordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_147","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_131","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64o-vmcmpgtf64oop","title":"vm.cmp.gt.f64.o
(VM::CmpGTF64OOp)","text":"Ordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_148","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_132","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpgtf64u-vmcmpgtf64uop","title":"vm.cmp.gt.f64.u
(VM::CmpGTF64UOp)","text":"Unordered floating-point greater-than comparison operation
Syntax:
operation ::= `vm.cmp.gt.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_149","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_133","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32o-vmcmpltef32oop","title":"vm.cmp.lte.f32.o
(VM::CmpLTEF32OOp)","text":"Ordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_150","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_134","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef32u-vmcmpltef32uop","title":"vm.cmp.lte.f32.u
(VM::CmpLTEF32UOp)","text":"Unordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_151","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_135","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64o-vmcmpltef64oop","title":"vm.cmp.lte.f64.o
(VM::CmpLTEF64OOp)","text":"Ordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_152","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_136","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltef64u-vmcmpltef64uop","title":"vm.cmp.lte.f64.u
(VM::CmpLTEF64UOp)","text":"Unordered floating-point less-than-or-equal comparison operation
Syntax:
operation ::= `vm.cmp.lte.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_153","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_137","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32o-vmcmpltf32oop","title":"vm.cmp.lt.f32.o
(VM::CmpLTF32OOp)","text":"Ordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_154","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_138","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf32u-vmcmpltf32uop","title":"vm.cmp.lt.f32.u
(VM::CmpLTF32UOp)","text":"Unordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_155","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_139","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64o-vmcmpltf64oop","title":"vm.cmp.lt.f64.o
(VM::CmpLTF64OOp)","text":"Ordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_156","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_140","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpltf64u-vmcmpltf64uop","title":"vm.cmp.lt.f64.u
(VM::CmpLTF64UOp)","text":"Unordered floating-point less-than comparison operation
Syntax:
operation ::= `vm.cmp.lt.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_157","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_141","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32o-vmcmpnef32oop","title":"vm.cmp.ne.f32.o
(VM::CmpNEF32OOp)","text":"Ordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f32.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_158","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_142","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef32u-vmcmpnef32uop","title":"vm.cmp.ne.f32.u
(VM::CmpNEF32UOp)","text":"Unordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f32.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_159","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_143","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64o-vmcmpnef64oop","title":"vm.cmp.ne.f64.o
(VM::CmpNEF64OOp)","text":"Ordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f64.o` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_160","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_144","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnef64u-vmcmpnef64uop","title":"vm.cmp.ne.f64.u
(VM::CmpNEF64UOp)","text":"Unordered floating-point inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.f64.u` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_161","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_145","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32o-vmcmpnzf32oop","title":"vm.cmp.nz.f32.o
(VM::CmpNZF32OOp)","text":"Ordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f32.o` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_162","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_146","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf32u-vmcmpnzf32uop","title":"vm.cmp.nz.f32.u
(VM::CmpNZF32UOp)","text":"Unordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f32.u` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_163","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_147","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64o-vmcmpnzf64oop","title":"vm.cmp.nz.f64.o
(VM::CmpNZF64OOp)","text":"Ordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f64.o` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_164","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_148","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzf64u-vmcmpnzf64uop","title":"vm.cmp.nz.f64.u
(VM::CmpNZF64UOp)","text":"Unordered floating-point non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.f64.u` operands attr-dict `:` type($operand)\n
Compares the given floating-point operand for a non-zero value.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64, VM_PseudoOp
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_165","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_149","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf32-vmcmpnanf32op","title":"vm.cmp.nan.f32
(VM::CmpNaNF32Op)","text":"Floating-point NaN comparison operation
Syntax:
operation ::= `vm.cmp.nan.f32` $operand attr-dict `:` type($operand)\n
Returns 1 if the value is NaN.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_166","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_150","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnanf64-vmcmpnanf64op","title":"vm.cmp.nan.f64
(VM::CmpNaNF64Op)","text":"Floating-point NaN comparison operation
Syntax:
operation ::= `vm.cmp.nan.f64` $operand attr-dict `:` type($operand)\n
Returns 1 if the value is NaN.
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_167","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_151","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#floating-point-math-ops","title":"Floating-point math ops","text":"These map directly to the math
dialect.
"},{"location":"reference/mlir-dialects/VM/#vmatan2f32-vmatan2f32op","title":"vm.atan2.f32
(VM::Atan2F32Op)","text":"2-argument arcus tangent of the given values
Syntax:
operation ::= `vm.atan2.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_168","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_152","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatan2f64-vmatan2f64op","title":"vm.atan2.f64
(VM::Atan2F64Op)","text":"2-argument arcus tangent of the given values
Syntax:
operation ::= `vm.atan2.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_169","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_153","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf32-vmatanf32op","title":"vm.atan.f32
(VM::AtanF32Op)","text":"Arcus tangent of the given value
Syntax:
operation ::= `vm.atan.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_170","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_154","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmatanf64-vmatanf64op","title":"vm.atan.f64
(VM::AtanF64Op)","text":"Arcus tangent of the given value
Syntax:
operation ::= `vm.atan.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_171","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_155","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf32-vmcosf32op","title":"vm.cos.f32
(VM::CosF32Op)","text":"Cosine of the specified value
Syntax:
operation ::= `vm.cos.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_172","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_156","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmcosf64-vmcosf64op","title":"vm.cos.f64
(VM::CosF64Op)","text":"Cosine of the specified value
Syntax:
operation ::= `vm.cos.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_173","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_157","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff32-vmerff32op","title":"vm.erf.f32
(VM::ErfF32Op)","text":"Computes the error function of the specified value
Syntax:
operation ::= `vm.erf.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_174","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_158","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmerff64-vmerff64op","title":"vm.erf.f64
(VM::ErfF64Op)","text":"Computes the error function of the specified value
Syntax:
operation ::= `vm.erf.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_175","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_159","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f32-vmexp2f32op","title":"vm.exp2.f32
(VM::Exp2F32Op)","text":"Base-2 exponential of the specified value
Syntax:
operation ::= `vm.exp2.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_176","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_160","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexp2f64-vmexp2f64op","title":"vm.exp2.f64
(VM::Exp2F64Op)","text":"Base-2 exponential of the specified value
Syntax:
operation ::= `vm.exp2.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_177","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_161","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf32-vmexpf32op","title":"vm.exp.f32
(VM::ExpF32Op)","text":"Base-e exponential of the specified value
Syntax:
operation ::= `vm.exp.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_178","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_162","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpf64-vmexpf64op","title":"vm.exp.f64
(VM::ExpF64Op)","text":"Base-e exponential of the specified value
Syntax:
operation ::= `vm.exp.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_179","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_163","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f32-vmexpm1f32op","title":"vm.expm1.f32
(VM::ExpM1F32Op)","text":"Base-e exponential of the specified value minus 1
Syntax:
operation ::= `vm.expm1.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_180","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_164","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmexpm1f64-vmexpm1f64op","title":"vm.expm1.f64
(VM::ExpM1F64Op)","text":"Base-e exponential of the specified value minus 1
Syntax:
operation ::= `vm.expm1.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_181","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_165","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f32-vmlog10f32op","title":"vm.log10.f32
(VM::Log10F32Op)","text":"Base-10 logarithm of the specified value
Syntax:
operation ::= `vm.log10.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_182","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_166","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog10f64-vmlog10f64op","title":"vm.log10.f64
(VM::Log10F64Op)","text":"Base-10 logarithm of the specified value
Syntax:
operation ::= `vm.log10.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_183","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_167","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf32-vmlog1pf32op","title":"vm.log1p.f32
(VM::Log1pF32Op)","text":"Natural logarithm of one plus the given value
Syntax:
operation ::= `vm.log1p.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_184","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_168","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog1pf64-vmlog1pf64op","title":"vm.log1p.f64
(VM::Log1pF64Op)","text":"Natural logarithm of one plus the given value
Syntax:
operation ::= `vm.log1p.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_185","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_169","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f32-vmlog2f32op","title":"vm.log2.f32
(VM::Log2F32Op)","text":"Base-2 logarithm of the specified value
Syntax:
operation ::= `vm.log2.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_186","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_170","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlog2f64-vmlog2f64op","title":"vm.log2.f64
(VM::Log2F64Op)","text":"Base-2 logarithm of the specified value
Syntax:
operation ::= `vm.log2.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_187","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_171","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf32-vmlogf32op","title":"vm.log.f32
(VM::LogF32Op)","text":"Base-e logarithm of the specified value
Syntax:
operation ::= `vm.log.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_188","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_172","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlogf64-vmlogf64op","title":"vm.log.f64
(VM::LogF64Op)","text":"Base-e logarithm of the specified value
Syntax:
operation ::= `vm.log.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_189","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_173","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf32-vmpowf32op","title":"vm.pow.f32
(VM::PowF32Op)","text":"Floating point raised to the power of operation
Syntax:
operation ::= `vm.pow.f32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_190","title":"Operands:","text":"Operand Description lhs
32-bit float rhs
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_174","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmpowf64-vmpowf64op","title":"vm.pow.f64
(VM::PowF64Op)","text":"Floating point raised to the power of operation
Syntax:
operation ::= `vm.pow.f64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_191","title":"Operands:","text":"Operand Description lhs
64-bit float rhs
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_175","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf32-vmrsqrtf32op","title":"vm.rsqrt.f32
(VM::RsqrtF32Op)","text":"Reciprocal of sqrt (1 / sqrt of the specified value)
Syntax:
operation ::= `vm.rsqrt.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_192","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_176","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmrsqrtf64-vmrsqrtf64op","title":"vm.rsqrt.f64
(VM::RsqrtF64Op)","text":"Reciprocal of sqrt (1 / sqrt of the specified value)
Syntax:
operation ::= `vm.rsqrt.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_193","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_177","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf32-vmsinf32op","title":"vm.sin.f32
(VM::SinF32Op)","text":"Sine of the specified value
Syntax:
operation ::= `vm.sin.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_194","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_178","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsinf64-vmsinf64op","title":"vm.sin.f64
(VM::SinF64Op)","text":"Sine of the specified value
Syntax:
operation ::= `vm.sin.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_195","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_179","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf32-vmsqrtf32op","title":"vm.sqrt.f32
(VM::SqrtF32Op)","text":"Sqrt of the specified value
Syntax:
operation ::= `vm.sqrt.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_196","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_180","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmsqrtf64-vmsqrtf64op","title":"vm.sqrt.f64
(VM::SqrtF64Op)","text":"Sqrt of the specified value
Syntax:
operation ::= `vm.sqrt.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_197","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_181","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf32-vmtanhf32op","title":"vm.tanh.f32
(VM::TanhF32Op)","text":"Hyperbolic tangent of the specified value
Syntax:
operation ::= `vm.tanh.f32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF32
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_198","title":"Operands:","text":"Operand Description operand
32-bit float"},{"location":"reference/mlir-dialects/VM/#results_182","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmtanhf64-vmtanhf64op","title":"vm.tanh.f64
(VM::TanhF64Op)","text":"Hyperbolic tangent of the specified value
Syntax:
operation ::= `vm.tanh.f64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, VM_ExtF64
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_199","title":"Operands:","text":"Operand Description operand
64-bit float"},{"location":"reference/mlir-dialects/VM/#results_183","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#global-ops","title":"Global ops","text":""},{"location":"reference/mlir-dialects/VM/#vmglobaladdress-vmglobaladdressop","title":"vm.global.address
(VM::GlobalAddressOp)","text":"Returns an address reference to a global
Syntax:
operation ::= `vm.global.address` $global attr-dict `:` type($result)\n
Returns an indirect address reference to the given global. During export the address will be converted to the natural format of the global table (for example, ordinals for refs and byte offsets for primitive types).
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), SymbolUserOpInterface, Util_GlobalAddressOpInterface, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#attributes_19","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_184","title":"Results:","text":"Result Description result
32-bit signless integer or a pointer-like reference"},{"location":"reference/mlir-dialects/VM/#vmglobalf32-vmglobalf32op","title":"vm.global.f32
(VM::GlobalF32Op)","text":"32-bit floating-point global declaration
Syntax:
operation ::= `vm.global.f32` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove, VM_ExtF32
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_20","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
FloatAttr32-bit floating-point value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalf64-vmglobalf64op","title":"vm.global.f64
(VM::GlobalF64Op)","text":"64-bit floating-point global declaration
Syntax:
operation ::= `vm.global.f64` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove, VM_ExtF64
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_21","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
FloatAttr64-bit floating-point value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali32-vmglobali32op","title":"vm.global.i32
(VM::GlobalI32Op)","text":"32-bit integer global declaration
Syntax:
operation ::= `vm.global.i32` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_22","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
IntegerAttr32-bit integer value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobali64-vmglobali64op","title":"vm.global.i64
(VM::GlobalI64Op)","text":"64-bit integer global declaration
Syntax:
operation ::= `vm.global.i64` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n custom<TypeOrAttr>($type, $initial_value)\n
Defines a global value that is treated as a scalar literal at runtime. Initialized to zero unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: Symbol, Util_GlobalOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_23","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute initial_value
IntegerAttr64-bit integer value ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf32-vmgloballoadf32op","title":"vm.global.load.f32
(VM::GlobalLoadF32Op)","text":"Global 32-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.f32` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_24","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_185","title":"Results:","text":"Result Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadf64-vmgloballoadf64op","title":"vm.global.load.f64
(VM::GlobalLoadF64Op)","text":"Global 64-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.f64` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_25","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_186","title":"Results:","text":"Result Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi32-vmgloballoadi32op","title":"vm.global.load.i32
(VM::GlobalLoadI32Op)","text":"Global 32-bit integer load operation
Syntax:
operation ::= `vm.global.load.i32` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_26","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_187","title":"Results:","text":"Result Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadi64-vmgloballoadi64op","title":"vm.global.load.i64
(VM::GlobalLoadI64Op)","text":"Global 64-bit integer load operation
Syntax:
operation ::= `vm.global.load.i64` $global attr-dict `:` type($value)\n
Loads the value of a global containing an primitive value.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_27","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_188","title":"Results:","text":"Result Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf32-vmgloballoadindirectf32op","title":"vm.global.load.indirect.f32
(VM::GlobalLoadIndirectF32Op)","text":"Global 32-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.indirect.f32` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Traits: VM_ExtF64
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_200","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<32-bit float>"},{"location":"reference/mlir-dialects/VM/#results_189","title":"Results:","text":"Result Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectf64-vmgloballoadindirectf64op","title":"vm.global.load.indirect.f64
(VM::GlobalLoadIndirectF64Op)","text":"Global 64-bit floating-point load operation
Syntax:
operation ::= `vm.global.load.indirect.f64` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Traits: VM_ExtF64
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_201","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<64-bit float>"},{"location":"reference/mlir-dialects/VM/#results_190","title":"Results:","text":"Result Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti32-vmgloballoadindirecti32op","title":"vm.global.load.indirect.i32
(VM::GlobalLoadIndirectI32Op)","text":"Global 32-bit integer load operation
Syntax:
operation ::= `vm.global.load.indirect.i32` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_202","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<32-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#results_191","title":"Results:","text":"Result Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirecti64-vmgloballoadindirecti64op","title":"vm.global.load.indirect.i64
(VM::GlobalLoadIndirectI64Op)","text":"Global 64-bit integer load operation
Syntax:
operation ::= `vm.global.load.indirect.i64` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a primitive value.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_203","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr<64-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#results_192","title":"Results:","text":"Result Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmgloballoadindirectref-vmgloballoadindirectrefop","title":"vm.global.load.indirect.ref
(VM::GlobalLoadIndirectRefOp)","text":"Global ref load operation
Syntax:
operation ::= `vm.global.load.indirect.ref` $global attr-dict `:` type($global) `->` type($value)\n
Loads the value of a global containing a ref of the given type.
Interfaces: Util_GlobalLoadIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_204","title":"Operands:","text":"Operand Description global
32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#results_193","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmgloballoadref-vmgloballoadrefop","title":"vm.global.load.ref
(VM::GlobalLoadRefOp)","text":"Global ref load operation
Syntax:
operation ::= `vm.global.load.ref` $global attr-dict `:` type($value)\n
Loads the value of a global containing a ref of the given type.
Interfaces: MemoryEffectOpInterface, OpAsmOpInterface, SymbolUserOpInterface, Util_GlobalLoadOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_28","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#results_194","title":"Results:","text":"Result Description value
ref"},{"location":"reference/mlir-dialects/VM/#vmglobalref-vmglobalrefop","title":"vm.global.ref
(VM::GlobalRefOp)","text":"Ref global declaration
Syntax:
operation ::= `vm.global.ref` custom<SymbolVisibility>($sym_visibility)\n (`mutable` $is_mutable^)?\n $sym_name\n attr-dict\n `:` $type\n
Defines a global value that is a ref of a specific type. The global will retain the ref object for the lifetime of the context or until the value is replaced with a store or reset. Initialized to null unless an initial value is specified.
Traits: HasParent, IsolatedFromAbove
Interfaces: GlobalOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_29","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute type
::mlir::TypeAttrany type attribute is_mutable
::mlir::UnitAttrunit attribute ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref32-vmglobalstoref32op","title":"vm.global.store.f32
(VM::GlobalStoreF32Op)","text":"Global 32-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.f32` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Traits: VM_ExtF32
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_30","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_205","title":"Operands:","text":"Operand Description value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstoref64-vmglobalstoref64op","title":"vm.global.store.f64
(VM::GlobalStoreF64Op)","text":"Global 64-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.f64` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Traits: VM_ExtF64
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_31","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_206","title":"Operands:","text":"Operand Description value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei32-vmglobalstorei32op","title":"vm.global.store.i32
(VM::GlobalStoreI32Op)","text":"Global 32-bit integer store operation
Syntax:
operation ::= `vm.global.store.i32` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_32","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_207","title":"Operands:","text":"Operand Description value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstorei64-vmglobalstorei64op","title":"vm.global.store.i64
(VM::GlobalStoreI64Op)","text":"Global 64-bit integer store operation
Syntax:
operation ::= `vm.global.store.i64` $value `,` $global attr-dict `:` type($value)\n
Stores a primitive value value to a global.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_33","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_208","title":"Operands:","text":"Operand Description value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf32-vmglobalstoreindirectf32op","title":"vm.global.store.indirect.f32
(VM::GlobalStoreIndirectF32Op)","text":"Global 32-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.indirect.f32` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Traits: VM_ExtF32
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_209","title":"Operands:","text":"Operand Description value
32-bit float global
32-bit signless integer or ptr<32-bit float>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectf64-vmglobalstoreindirectf64op","title":"vm.global.store.indirect.f64
(VM::GlobalStoreIndirectF64Op)","text":"Global 64-bit floating-point store operation
Syntax:
operation ::= `vm.global.store.indirect.f64` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Traits: VM_ExtF64
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_210","title":"Operands:","text":"Operand Description value
64-bit float global
32-bit signless integer or ptr<64-bit float>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti32-vmglobalstoreindirecti32op","title":"vm.global.store.indirect.i32
(VM::GlobalStoreIndirectI32Op)","text":"Global 32-bit integer store operation
Syntax:
operation ::= `vm.global.store.indirect.i32` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_211","title":"Operands:","text":"Operand Description value
32-bit signless integer global
32-bit signless integer or ptr<32-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirecti64-vmglobalstoreindirecti64op","title":"vm.global.store.indirect.i64
(VM::GlobalStoreIndirectI64Op)","text":"Global 64-bit integer store operation
Syntax:
operation ::= `vm.global.store.indirect.i64` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a primitive value to a global.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_212","title":"Operands:","text":"Operand Description value
64-bit signless integer global
32-bit signless integer or ptr<64-bit signless integer>"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreindirectref-vmglobalstoreindirectrefop","title":"vm.global.store.indirect.ref
(VM::GlobalStoreIndirectRefOp)","text":"Global ref stores operation
Syntax:
operation ::= `vm.global.store.indirect.ref` $value `,` $global attr-dict `:` type($value) `->` type($global)\n
Stores a ref to a global, retaining it until the global is reset.
Interfaces: Util_GlobalStoreIndirectOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#operands_213","title":"Operands:","text":"Operand Description value
ref global
32-bit signless integer or ptr"},{"location":"reference/mlir-dialects/VM/#vmglobalstoreref-vmglobalstorerefop","title":"vm.global.store.ref
(VM::GlobalStoreRefOp)","text":"Global ref stores operation
Syntax:
operation ::= `vm.global.store.ref` $value `,` $global attr-dict `:` type($value)\n
Stores a ref to a global, retaining it until the global is reset.
Interfaces: SymbolUserOpInterface, Util_GlobalStoreOpInterface, VMSerializableOp, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_34","title":"Attributes:","text":"AttributeMLIR TypeDescription global
FlatSymbolRefAttrsymbol reference attribute"},{"location":"reference/mlir-dialects/VM/#operands_214","title":"Operands:","text":"Operand Description value
ref"},{"location":"reference/mlir-dialects/VM/#integer-arithmetic-ops","title":"Integer arithmetic ops","text":""},{"location":"reference/mlir-dialects/VM/#vmabsi32-vmabsi32op","title":"vm.abs.i32
(VM::AbsI32Op)","text":"Integer absolute-value operation
Syntax:
operation ::= `vm.abs.i32` $operand attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_215","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_195","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmabsi64-vmabsi64op","title":"vm.abs.i64
(VM::AbsI64Op)","text":"Integer absolute-value operation
Syntax:
operation ::= `vm.abs.i64` $operand attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_216","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_196","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi32-vmaddi32op","title":"vm.add.i32
(VM::AddI32Op)","text":"Integer add operation
Syntax:
operation ::= `vm.add.i32` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_217","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_197","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmaddi64-vmaddi64op","title":"vm.add.i64
(VM::AddI64Op)","text":"Integer add operation
Syntax:
operation ::= `vm.add.i64` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_218","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_198","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32s-vmdivi32sop","title":"vm.div.i32.s
(VM::DivI32SOp)","text":"Signed integer division operation
Syntax:
operation ::= `vm.div.i32.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_219","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_199","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi32u-vmdivi32uop","title":"vm.div.i32.u
(VM::DivI32UOp)","text":"Unsigned integer division operation
Syntax:
operation ::= `vm.div.i32.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_220","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_200","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64s-vmdivi64sop","title":"vm.div.i64.s
(VM::DivI64SOp)","text":"Signed integer division operation
Syntax:
operation ::= `vm.div.i64.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_221","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_201","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmdivi64u-vmdivi64uop","title":"vm.div.i64.u
(VM::DivI64UOp)","text":"Unsigned integer division operation
Syntax:
operation ::= `vm.div.i64.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_222","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_202","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai32-vmfmai32op","title":"vm.fma.i32
(VM::FMAI32Op)","text":"Integer fused-multiply add operation (a*b+c)
Syntax:
operation ::= `vm.fma.i32` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_223","title":"Operands:","text":"Operand Description a
32-bit signless integer b
32-bit signless integer c
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_203","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmfmai64-vmfmai64op","title":"vm.fma.i64
(VM::FMAI64Op)","text":"Integer fused-multiply add operation (a*b+c)
Syntax:
operation ::= `vm.fma.i64` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_224","title":"Operands:","text":"Operand Description a
64-bit signless integer b
64-bit signless integer c
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_204","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32s-vmmaxi32sop","title":"vm.max.i32.s
(VM::MaxI32SOp)","text":"Signed integer maximum operation
Syntax:
operation ::= `vm.max.i32.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_225","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_205","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi32u-vmmaxi32uop","title":"vm.max.i32.u
(VM::MaxI32UOp)","text":"Unsigned integer maximum operation
Syntax:
operation ::= `vm.max.i32.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_226","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_206","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64s-vmmaxi64sop","title":"vm.max.i64.s
(VM::MaxI64SOp)","text":"Signed integer maximum operation
Syntax:
operation ::= `vm.max.i64.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_227","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_207","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmaxi64u-vmmaxi64uop","title":"vm.max.i64.u
(VM::MaxI64UOp)","text":"Unsigned integer maximum operation
Syntax:
operation ::= `vm.max.i64.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_228","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_208","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32s-vmmini32sop","title":"vm.min.i32.s
(VM::MinI32SOp)","text":"Signed integer minimum operation
Syntax:
operation ::= `vm.min.i32.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_229","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_209","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini32u-vmmini32uop","title":"vm.min.i32.u
(VM::MinI32UOp)","text":"Unsigned integer minimum operation
Syntax:
operation ::= `vm.min.i32.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_230","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_210","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64s-vmmini64sop","title":"vm.min.i64.s
(VM::MinI64SOp)","text":"Signed integer minimum operation
Syntax:
operation ::= `vm.min.i64.s` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_231","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_211","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmini64u-vmmini64uop","title":"vm.min.i64.u
(VM::MinI64UOp)","text":"Unsigned integer minimum operation
Syntax:
operation ::= `vm.min.i64.u` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_232","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_212","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli32-vmmuli32op","title":"vm.mul.i32
(VM::MulI32Op)","text":"Integer multiplication operation
Syntax:
operation ::= `vm.mul.i32` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_233","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_213","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmmuli64-vmmuli64op","title":"vm.mul.i64
(VM::MulI64Op)","text":"Integer multiplication operation
Syntax:
operation ::= `vm.mul.i64` operands attr-dict `:` type($result)\n
Traits: Commutative
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_234","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_214","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32s-vmremi32sop","title":"vm.rem.i32.s
(VM::RemI32SOp)","text":"Signed integer division remainder operation
Syntax:
operation ::= `vm.rem.i32.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_235","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_215","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi32u-vmremi32uop","title":"vm.rem.i32.u
(VM::RemI32UOp)","text":"Unsigned integer division remainder operation
Syntax:
operation ::= `vm.rem.i32.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_236","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_216","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64s-vmremi64sop","title":"vm.rem.i64.s
(VM::RemI64SOp)","text":"Signed integer division remainder operation
Syntax:
operation ::= `vm.rem.i64.s` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_237","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_217","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmremi64u-vmremi64uop","title":"vm.rem.i64.u
(VM::RemI64UOp)","text":"Unsigned integer division remainder operation
Syntax:
operation ::= `vm.rem.i64.u` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_238","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_218","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi32-vmsubi32op","title":"vm.sub.i32
(VM::SubI32Op)","text":"Integer subtract operation
Syntax:
operation ::= `vm.sub.i32` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_239","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_219","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmsubi64-vmsubi64op","title":"vm.sub.i64
(VM::SubI64Op)","text":"Integer subtract operation
Syntax:
operation ::= `vm.sub.i64` operands attr-dict `:` type($result)\n
Interfaces: NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_240","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_220","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#integer-bit-manipulation-ops","title":"Integer bit manipulation ops","text":""},{"location":"reference/mlir-dialects/VM/#vmandi32-vmandi32op","title":"vm.and.i32
(VM::AndI32Op)","text":"Integer binary and operation
Syntax:
operation ::= `vm.and.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_241","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_221","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmandi64-vmandi64op","title":"vm.and.i64
(VM::AndI64Op)","text":"Integer binary and operation
Syntax:
operation ::= `vm.and.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_242","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_222","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi32-vmctlzi32op","title":"vm.ctlz.i32
(VM::CtlzI32Op)","text":"Counts the leading zeros in an integer value
Syntax:
operation ::= `vm.ctlz.i32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_243","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_223","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmctlzi64-vmctlzi64op","title":"vm.ctlz.i64
(VM::CtlzI64Op)","text":"Counts the leading zeros in an integer value
Syntax:
operation ::= `vm.ctlz.i64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_244","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_224","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti32-vmnoti32op","title":"vm.not.i32
(VM::NotI32Op)","text":"Integer binary not operation
Syntax:
operation ::= `vm.not.i32` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_245","title":"Operands:","text":"Operand Description operand
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_225","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmnoti64-vmnoti64op","title":"vm.not.i64
(VM::NotI64Op)","text":"Integer binary not operation
Syntax:
operation ::= `vm.not.i64` $operand attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_246","title":"Operands:","text":"Operand Description operand
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_226","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori32-vmori32op","title":"vm.or.i32
(VM::OrI32Op)","text":"Integer binary or operation
Syntax:
operation ::= `vm.or.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_247","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_227","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmori64-vmori64op","title":"vm.or.i64
(VM::OrI64Op)","text":"Integer binary or operation
Syntax:
operation ::= `vm.or.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_248","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_228","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori32-vmxori32op","title":"vm.xor.i32
(VM::XorI32Op)","text":"Integer binary exclusive-or operation
Syntax:
operation ::= `vm.xor.i32` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_249","title":"Operands:","text":"Operand Description lhs
32-bit signless integer rhs
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_229","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmxori64-vmxori64op","title":"vm.xor.i64
(VM::XorI64Op)","text":"Integer binary exclusive-or operation
Syntax:
operation ::= `vm.xor.i64` operands attr-dict `:` type($result)\n
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_250","title":"Operands:","text":"Operand Description lhs
64-bit signless integer rhs
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_230","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#list-ops","title":"List ops","text":""},{"location":"reference/mlir-dialects/VM/#vmlistalloc-vmlistallocop","title":"vm.list.alloc
(VM::ListAllocOp)","text":"Allocates a new empty list
Syntax:
operation ::= `vm.list.alloc` operands attr-dict `:` `(` type($initial_capacity) `)` `->` type($result)\n
Allocates a new typed list with a minimum initial_capacity.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_251","title":"Operands:","text":"Operand Description initial_capacity
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_231","title":"Results:","text":"Result Description result
list"},{"location":"reference/mlir-dialects/VM/#vmlistgetf32-vmlistgetf32op","title":"vm.list.get.f32
(VM::ListGetF32Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.f32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_252","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_232","title":"Results:","text":"Result Description result
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgetf64-vmlistgetf64op","title":"vm.list.get.f64
(VM::ListGetF64Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.f64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_253","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_233","title":"Results:","text":"Result Description result
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistgeti32-vmlistgeti32op","title":"vm.list.get.i32
(VM::ListGetI32Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.i32` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_254","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_234","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgeti64-vmlistgeti64op","title":"vm.list.get.i64
(VM::ListGetI64Op)","text":"Primitive type element accessor
Syntax:
operation ::= `vm.list.get.i64` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the value of the element at the given index.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_255","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_235","title":"Results:","text":"Result Description result
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistgetref-vmlistgetrefop","title":"vm.list.get.ref
(VM::ListGetRefOp)","text":"Ref type element accessor
Syntax:
operation ::= `vm.list.get.ref` operands attr-dict `:` `(` type($list) `,` type($index) `)` `->` type($result)\n
Returns the ref value of the element at the given index. Note that the value may be null if the element is null or the type does not match.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_256","title":"Operands:","text":"Operand Description list
list index
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#results_236","title":"Results:","text":"Result Description result
ref"},{"location":"reference/mlir-dialects/VM/#vmlistreserve-vmlistreserveop","title":"vm.list.reserve
(VM::ListReserveOp)","text":"Reserves capacity for list growth
Syntax:
operation ::= `vm.list.reserve` operands attr-dict `:` `(` type($list) `,` type($minimum_capacity) `)`\n
Reserves storage for at least minimum_capacity elements. If the list already has at least the specified capacity the operation is ignored.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Allocate on ::mlir::SideEffects::DefaultResource, MemoryEffects::Read on ::mlir::SideEffects::DefaultResource, MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_257","title":"Operands:","text":"Operand Description list
list minimum_capacity
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistresize-vmlistresizeop","title":"vm.list.resize
(VM::ListResizeOp)","text":"Resizes the list to a new count in elements
Syntax:
operation ::= `vm.list.resize` operands attr-dict `:` `(` type($list) `,` type($new_size) `)`\n
Resizes the list to contain new_size elements. This will either truncate the list if the existing size is greater than new_size or extend the list with the default list value of 0 if storing primitives and null if refs.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_258","title":"Operands:","text":"Operand Description list
list new_size
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetf32-vmlistsetf32op","title":"vm.list.set.f32
(VM::ListSetF32Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.f32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Traits: VM_ExtF32
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_259","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
32-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistsetf64-vmlistsetf64op","title":"vm.list.set.f64
(VM::ListSetF64Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.f64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Traits: VM_ExtF64
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_260","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
64-bit float"},{"location":"reference/mlir-dialects/VM/#vmlistseti32-vmlistseti32op","title":"vm.list.set.i32
(VM::ListSetI32Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.i32` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_261","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistseti64-vmlistseti64op","title":"vm.list.set.i64
(VM::ListSetI64Op)","text":"Primitive type element mutator
Syntax:
operation ::= `vm.list.set.i64` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new value.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_262","title":"Operands:","text":"Operand Description list
list<8/16/32/64-bit integer or 16/32/64-bit float> index
32-bit signless integer value
64-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmlistsetref-vmlistsetrefop","title":"vm.list.set.ref
(VM::ListSetRefOp)","text":"Ref type element mutator
Syntax:
operation ::= `vm.list.set.ref` operands attr-dict `:` `(` type($list) `,` type($index) `,` type($value) `)`\n
Sets the element at the given index to the new ref value (possibly null).
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_263","title":"Operands:","text":"Operand Description list
list index
32-bit signless integer value
ref"},{"location":"reference/mlir-dialects/VM/#vmlistsize-vmlistsizeop","title":"vm.list.size
(VM::ListSizeOp)","text":"The size of the list in elements
Syntax:
operation ::= `vm.list.size` operands attr-dict `:` `(` type($list) `)` `->` type($result)\n
Returns the current size of the list in elements.
Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface), VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}
"},{"location":"reference/mlir-dialects/VM/#operands_264","title":"Operands:","text":"Operand Description list
list"},{"location":"reference/mlir-dialects/VM/#results_237","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#ref-comparison-ops","title":"Ref comparison ops","text":"Comparison ops for vm.ref
.
"},{"location":"reference/mlir-dialects/VM/#vmcmpeqref-vmcmpeqrefop","title":"vm.cmp.eq.ref
(VM::CmpEQRefOp)","text":"Ref equality comparison operation
Syntax:
operation ::= `vm.cmp.eq.ref` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_265","title":"Operands:","text":"Operand Description lhs
ref rhs
ref"},{"location":"reference/mlir-dialects/VM/#results_238","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpneref-vmcmpnerefop","title":"vm.cmp.ne.ref
(VM::CmpNERefOp)","text":"Ref inequality comparison operation
Syntax:
operation ::= `vm.cmp.ne.ref` operands attr-dict `:` type($lhs)\n
Compares two operands with the specified predicate.
Traits: AlwaysSpeculatableImplTrait, Commutative
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_266","title":"Operands:","text":"Operand Description lhs
ref rhs
ref"},{"location":"reference/mlir-dialects/VM/#results_239","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#vmcmpnzref-vmcmpnzrefop","title":"vm.cmp.nz.ref
(VM::CmpNZRefOp)","text":"Ref non-zero comparison operation
Syntax:
operation ::= `vm.cmp.nz.ref` $operand attr-dict `:` type($operand)\n
Compares the given ref operand for a non-zero/null value.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface), OpAsmOpInterface, VMSerializableOp, VM_OpInterface
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VM/#operands_267","title":"Operands:","text":"Operand Description operand
ref"},{"location":"reference/mlir-dialects/VM/#results_240","title":"Results:","text":"Result Description result
32-bit signless integer"},{"location":"reference/mlir-dialects/VM/#structural-ops","title":"Structural ops","text":""},{"location":"reference/mlir-dialects/VM/#vmexport-vmexportop","title":"vm.export
(VM::ExportOp)","text":"Exports a function from the module
Specifies an exported function with an externally-visible alias. Multiple exports can reference the same internal functions.
Interfaces: SymbolUserOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_35","title":"Attributes:","text":"AttributeMLIR TypeDescription function_ref
::mlir::FlatSymbolRefAttrflat symbol reference attribute export_name
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrordinal value"},{"location":"reference/mlir-dialects/VM/#vmfunc-vmfuncop","title":"vm.func
(VM::FuncOp)","text":"Function defined with VM control flow ops
Represents a function containing VM ops and those of compatible dialects. All flow control is performed by VM ops.
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_36","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type ordinal
::mlir::IntegerAttrordinal value noinline
::mlir::UnitAttrunit attribute arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmimport-vmimportop","title":"vm.import
(VM::ImportOp)","text":"Imports a function from an external module
Specifies a function that should be imported from either the runtime or an external VM module.
Required imports can be declared with a minimum version of the module that contains the import. The maximum declared minimum version of all required imports from the module will become the required minimum version at runtime.
Optional imports not present at runtime will be invalid to call and whether they were resolved can be queried with vm.import.resolved
.
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_37","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_name
::mlir::StringAttrstring attribute function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes sym_visibility
::mlir::StringAttrstring attribute ordinal
::mlir::IntegerAttrordinal value is_optional
::mlir::UnitAttrunit attribute minimum_version
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vminitializer-vminitializerop","title":"vm.initializer
(VM::InitializerOp)","text":"Global initialization function
A function that is called in definition order upon module initialization. Must not load any globals that are defined or initialized after it in the module.
Traits: HasParent, IsolatedFromAbove
Interfaces: CallableOpInterface, FunctionOpInterface, Symbol, Util_InitializerOpInterface, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_38","title":"Attributes:","text":"AttributeMLIR TypeDescription function_type
::mlir::TypeAttrtype attribute of function type arg_attrs
::mlir::ArrayAttrArray of dictionary attributes res_attrs
::mlir::ArrayAttrArray of dictionary attributes"},{"location":"reference/mlir-dialects/VM/#vmmodule-vmmoduleop","title":"vm.module
(VM::ModuleOp)","text":"Module containing VM functions and variables
Syntax:
operation ::= `vm.module` custom<SymbolVisibility>($sym_visibility)\n $sym_name\n attr-dict-with-keyword\n regions\n
Top-level container for VM functions.
Traits: IsolatedFromAbove, SingleBlock, SingleBlockImplicitTerminator, SymbolTable
Interfaces: Symbol, VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attributes_39","title":"Attributes:","text":"AttributeMLIR TypeDescription sym_visibility
::mlir::StringAttrstring attribute sym_name
::mlir::StringAttrstring attribute ordinal_counts
::mlir::iree_compiler::IREE::VM::OrdinalCountsAttr version
::mlir::IntegerAttr32-bit signless integer attribute"},{"location":"reference/mlir-dialects/VM/#vmmodule_terminator-vmmoduleterminatorop","title":"vm.module_terminator
(VM::ModuleTerminatorOp)","text":"Terminator pseudo-op for the module op
Syntax:
operation ::= `vm.module_terminator` attr-dict\n
Traits: HasParent, Terminator
Interfaces: VM_OpInterface
"},{"location":"reference/mlir-dialects/VM/#attribute-definition","title":"Attribute definition","text":""},{"location":"reference/mlir-dialects/VM/#ordinalcountsattr","title":"OrdinalCountsAttr","text":"Syntax:
#vm.ordinal_counts<\n int32_t, # import_funcs\n int32_t, # export_funcs\n int32_t, # internal_funcs\n int32_t, # global_bytes\n int32_t, # global_refs\n int32_t, # rodatas\n int32_t # rwdatas\n>\n
"},{"location":"reference/mlir-dialects/VM/#parameters","title":"Parameters:","text":"Parameter C++ type Description import_funcs int32_t
export_funcs int32_t
internal_funcs int32_t
global_bytes int32_t
global_refs int32_t
rodatas int32_t
rwdatas int32_t
"},{"location":"reference/mlir-dialects/VMVX/","title":"VMVX","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvx-dialect","title":"'vmvx' Dialect","text":"Vector extensions to the IREE VM.
This is a reference dialect representing a simple IREE VM-based linear algebra module that is used as a library at runtime. The ops in this dialect map (roughly) 1:1 with the exported functions in the runtime module.
See vmvx.imports.mlir
for the full list of exported functions.
- 'vmvx' Dialect
- Operation definition
- ABI ops
- vmvx.binary (VMVX::BinaryOp)
- vmvx.copy (VMVX::CopyOp)
- vmvx.fill2d (VMVX::Fill2DOp)
- vmvx.unary (VMVX::UnaryOp)
- Utility ops
- vmvx.get_buffer_descriptor (VMVX::GetBufferDescriptorOp)
- vmvx.get_raw_interface_binding_buffer (VMVX::GetRawInterfaceBindingBufferOp)
"},{"location":"reference/mlir-dialects/VMVX/#operation-definition","title":"Operation definition","text":""},{"location":"reference/mlir-dialects/VMVX/#abi-ops","title":"ABI ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxbinary-vmvxbinaryop","title":"vmvx.binary
(VMVX::BinaryOp)","text":"Performs a strided elementwise operation on two same-rank buffers
Syntax:
operation ::= `vmvx.binary` `op` `` `(` $opcode `:` $element_type `)`\n `lhs` `` `(` $lhs_buffer `offset` $lhs_offset `strides` `[` $lhs_strides `]` `:` type($lhs_buffer) `)`\n `rhs` `` `(` $rhs_buffer `offset` $rhs_offset `strides` `[` $rhs_strides `]` `:` type($rhs_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n attr-dict\n
Performs the operation in-place as if:
OUT = OP(LHS, RHS)\n
Where OP
is a concrete operation name as defined in ukernel/elementwise.h
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes","title":"Attributes:","text":"AttributeMLIR TypeDescription opcode
::mlir::StringAttrstring attribute element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands","title":"Operands:","text":"Operand Description lhs_buffer
a reference counted byte buffer lhs_offset
index lhs_strides
variadic of index rhs_buffer
a reference counted byte buffer rhs_offset
index rhs_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxcopy-vmvxcopyop","title":"vmvx.copy
(VMVX::CopyOp)","text":"Copy from one buffer to another
Syntax:
operation ::= `vmvx.copy` `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n `:` $element_type\n attr-dict\n
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes_1","title":"Attributes:","text":"AttributeMLIR TypeDescription element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_1","title":"Operands:","text":"Operand Description in_buffer
a reference counted byte buffer in_offset
index in_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxfill2d-vmvxfill2dop","title":"vmvx.fill2d
(VMVX::Fill2DOp)","text":"Fill a tile with a scalar
Syntax:
operation ::= `vmvx.fill2d` `scalar` `` `(` $scalar `:` type($scalar) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `row_stride` $out_row_stride `:` type($out_buffer) `)`\n `sizes` `` `(` $m `,` $n `)`\n attr-dict\n
Fills a tile with dimensions [m, n] with a scalar.
"},{"location":"reference/mlir-dialects/VMVX/#operands_2","title":"Operands:","text":"Operand Description scalar
8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float out_buffer
a reference counted byte buffer out_offset
index out_row_stride
index m
index n
index"},{"location":"reference/mlir-dialects/VMVX/#vmvxunary-vmvxunaryop","title":"vmvx.unary
(VMVX::UnaryOp)","text":"Performs a strided elementwise unary operation
Syntax:
operation ::= `vmvx.unary` `op` `` `(` $opcode `:` $element_type `)`\n `in` `` `(` $in_buffer `offset` $in_offset `strides` `[` $in_strides `]` `:` type($in_buffer) `)`\n `out` `` `(` $out_buffer `offset` $out_offset `strides` `[` $out_strides `]` `:` type($out_buffer) `)`\n `sizes` `` `(` $sizes `)`\n attr-dict\n
Performs the operation in-place as if:
OUT = OP(IN)\n
Where OP
is a concrete operation name as defined in ukernel/elementwise.h
Traits: SameVariadicOperandSize
"},{"location":"reference/mlir-dialects/VMVX/#attributes_2","title":"Attributes:","text":"AttributeMLIR TypeDescription opcode
::mlir::StringAttrstring attribute element_type
::mlir::TypeAttrtype attribute of 8-bit signless integer or 16-bit signless integer or 32-bit signless integer or 64-bit signless integer or 32-bit float or 64-bit float"},{"location":"reference/mlir-dialects/VMVX/#operands_3","title":"Operands:","text":"Operand Description in_buffer
a reference counted byte buffer in_offset
index in_strides
variadic of index out_buffer
a reference counted byte buffer out_offset
index out_strides
variadic of index sizes
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#utility-ops","title":"Utility ops","text":""},{"location":"reference/mlir-dialects/VMVX/#vmvxget_buffer_descriptor-vmvxgetbufferdescriptorop","title":"vmvx.get_buffer_descriptor
(VMVX::GetBufferDescriptorOp)","text":"Late binds a base buffer/offset/strides
Syntax:
operation ::= `vmvx.get_buffer_descriptor` $source `:` type($source) `->` type(results) attr-dict\n
Queries a base buffer, offset and strides. This op is late bound to its source (alloca, binding, etc), allowing additional layers of transformations to be added as lowering progresses (or for buffers to be combined).
This op has canonicalization rules which will bubble it up through the view stack. A final reconciliation pass is used explicitly to bind it to concrete sources.
Traits: AlwaysSpeculatableImplTrait, SameVariadicResultSize
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VMVX/#operands_4","title":"Operands:","text":"Operand Description source
memref of any type values"},{"location":"reference/mlir-dialects/VMVX/#results","title":"Results:","text":"Result Description base_buffer
a reference counted byte buffer offset
index sizes
variadic of index strides
variadic of index"},{"location":"reference/mlir-dialects/VMVX/#vmvxget_raw_interface_binding_buffer-vmvxgetrawinterfacebindingbufferop","title":"vmvx.get_raw_interface_binding_buffer
(VMVX::GetRawInterfaceBindingBufferOp)","text":"Gets the raw buffer associated with a binding
Syntax:
operation ::= `vmvx.get_raw_interface_binding_buffer` `set` `(` $set `)` `binding` `(` $binding `)` attr-dict\n
Normally, a slice of a binding buffer is returned via hal.interface.binding.subspan. However, the normal VMVX lowering flow for this presumes that the result is a memref, and upon final conversion, it will offset the memref automatically to make it consistent.
This op is used in situations where earlier in a lowering, we have fully resolved the binding to a buffer and would just like the raw backing buffer as passed to the interface.
Traits: AlwaysSpeculatableImplTrait
Interfaces: ConditionallySpeculatable, NoMemoryEffect (MemoryEffectOpInterface)
Effects: MemoryEffects::Effect{}
"},{"location":"reference/mlir-dialects/VMVX/#attributes_3","title":"Attributes:","text":"AttributeMLIR TypeDescription set
::mlir::IntegerAttrindex attribute binding
::mlir::IntegerAttrindex attribute"},{"location":"reference/mlir-dialects/VMVX/#results_1","title":"Results:","text":"Result Description buffer
a reference counted byte buffer"},{"location":"community/tags/","title":"Tags","text":"Website pages sorted by tag:
"},{"location":"community/tags/#android","title":"Android","text":" - Android cross-compilation
- Android LLDB debugging
"},{"location":"community/tags/#cpu","title":"CPU","text":" - RISC-V cross-compilation
- Matrix Multiplication with MMT4D
- Profiling CPUs
- CPU - Bare-Metal
- CPU
"},{"location":"community/tags/#cuda","title":"CUDA","text":" - CUDA backend
- CUDA backend design
- GPU - CUDA
"},{"location":"community/tags/#gpu","title":"GPU","text":" - CUDA backend
- Vulkan environment setup
- CUDA backend design
- Profiling GPUs using Vulkan
- GPU - CUDA
- GPU - Metal
- GPU - ROCm
- GPU - Vulkan
"},{"location":"community/tags/#jax","title":"JAX","text":" - JAX
- Extensions
- Glossary
"},{"location":"community/tags/#pytorch","title":"PyTorch","text":" - PyTorch
- Extensions
- Glossary
"},{"location":"community/tags/#python","title":"Python","text":" - JAX
- PyTorch
- TensorFlow
- TensorFlow Lite
- Python
"},{"location":"community/tags/#tensorflow","title":"TensorFlow","text":" - TFLite support via TOSA
- TensorFlow
- TensorFlow Lite
- Extensions
- Glossary
"},{"location":"community/tags/#vulkan","title":"Vulkan","text":" - Vulkan environment setup
- Profiling GPUs using Vulkan
- GPU - Vulkan
"},{"location":"community/tags/#web","title":"Web","text":" - Building with Emscripten
"},{"location":"community/tags/#ios","title":"iOS","text":" - iOS cross-compilation
- GPU - Metal
"},{"location":"community/blog/archive/2021/","title":"2021","text":""},{"location":"community/blog/category/platforms/","title":"Platforms","text":""},{"location":"community/blog/category/performance/","title":"Performance","text":""},{"location":"community/blog/category/frontends/","title":"Frontends","text":""}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 60fa95f62992..475629ba871d 100755
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ