diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
new file mode 100644
index 0000000000..6bdb07e719
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/Utils.h>
+
+namespace at {
+namespace native {
+namespace vulkan {
+
+void ExecuteNode::encode(ComputeGraph* graph) {
+  api::Context* const context = graph->context();
+  api::PipelineBarrier pipeline_barrier{};
+
+  std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
+
+  api::DescriptorSet descriptor_set =
+      context->get_descriptor_set(shader_, local_workgroup_size_);
+
+  uint32_t idx = 0;
+  idx = bind_values_to_descriptor_set(
+      graph,
+      outputs_,
+      pipeline_barrier,
+      api::MemoryAccessType::WRITE,
+      descriptor_set,
+      idx);
+  idx = bind_values_to_descriptor_set(
+      graph,
+      inputs_,
+      pipeline_barrier,
+      api::MemoryAccessType::READ,
+      descriptor_set,
+      idx);
+  descriptor_set.bind(idx, params_.buffer());
+
+  context->register_shader_dispatch(
+      descriptor_set, pipeline_barrier, shader_, global_workgroup_size_);
+}
+
+} // namespace vulkan
+} // namespace native
+} // namespace at
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
index 2b8fb04cbd..1b726e73d4 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -33,20 +33,37 @@ class ExecuteNode {
 
  public:
   ExecuteNode(ValueRef input, ValueRef output)
-      : inputs_{input}, outputs_{output} {}
+      : outputs_{output}, inputs_{input} {}
+
   ExecuteNode(
+      const api::ShaderInfo& shader,
+      const api::utils::uvec3& global_workgroup_size,
+      const api::utils::uvec3& local_workgroup_size,
+      const std::vector<ValueRef>& outputs,
       const std::vector<ValueRef>& inputs,
-      const std::vector<ValueRef>& outputs)
-      : inputs_(inputs), outputs_(outputs) {}
+      api::UniformParamsBuffer&& params)
+      : shader_(shader),
+        global_workgroup_size_(global_workgroup_size),
+        local_workgroup_size_(local_workgroup_size),
+        outputs_(outputs),
+        inputs_(inputs),
+        params_(std::move(params)) {}
 
   virtual ~ExecuteNode() = default;
 
  protected:
-  std::vector<ValueRef> inputs_;
+  // TODO: Consider making members const after we remove StagingNode.
+  api::ShaderInfo shader_;
+  api::utils::uvec3 global_workgroup_size_;
+  api::utils::uvec3 local_workgroup_size_;
   std::vector<ValueRef> outputs_;
+  std::vector<ValueRef> inputs_;
+  // TODO(T180906086): pass multiple buffers and index with ValueRef.
+  // TODO(T180906457): allow re-computing param buffers.
+  api::UniformParamsBuffer params_;
 
  public:
-  virtual void encode(ComputeGraph* graph) const = 0;
+  virtual void encode(ComputeGraph* graph);
 };
 
 } // namespace vulkan
diff --git a/backends/vulkan/runtime/graph/ops/Utils.cpp b/backends/vulkan/runtime/graph/ops/Utils.cpp
new file mode 100644
index 0000000000..579eac54e3
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/Utils.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/Utils.h>
+
+namespace at {
+namespace native {
+namespace vulkan {
+
+api::utils::ivec4 get_size_as_ivec4(const vTensor& t) {
+  return api::utils::make_ivec4(
+      {dim_at<Dim4D::Width>(t),
+       dim_at<Dim4D::Height>(t),
+       dim_at<Dim4D::Channel>(t),
+       dim_at<Dim4D::Batch>(t)});
+}
+
+void bind_tensor_to_descriptor_set(
+    vTensor& tensor,
+    api::PipelineBarrier& pipeline_barrier,
+    const api::MemoryAccessType accessType,
+    api::DescriptorSet& descriptor_set,
+    const uint32_t idx) {
+  if (tensor.buffer()) {
+    api::VulkanBuffer& buffer = tensor.buffer(
+        pipeline_barrier, api::PipelineStage::COMPUTE, accessType);
+    descriptor_set.bind(idx, buffer);
+  } else {
+    api::VulkanImage& image =
+        tensor.image(pipeline_barrier, api::PipelineStage::COMPUTE, accessType);
+    descriptor_set.bind(idx, image);
+  }
+}
+
+uint32_t bind_values_to_descriptor_set(
+    ComputeGraph* graph,
+    const std::vector<ValueRef>& args,
+    api::PipelineBarrier& pipeline_barrier,
+    const api::MemoryAccessType accessType,
+    api::DescriptorSet& descriptor_set,
+    const uint32_t base_idx) {
+  uint32_t idx = base_idx;
+  for (auto& arg : args) {
+    Value& val = graph->get_val(arg);
+    if (val.isTensor()) {
+      vTensor& tensor = val.toTensor();
+      bind_tensor_to_descriptor_set(
+          tensor, pipeline_barrier, accessType, descriptor_set, idx++);
+    } else {
+      VK_THROW("Unsupported type: ", val.type());
+    }
+  }
+  return idx;
+}
+
+} // namespace vulkan
+} // namespace native
+} // namespace at
diff --git a/backends/vulkan/runtime/graph/ops/Utils.h b/backends/vulkan/runtime/graph/ops/Utils.h
index f962c17bcc..9cf214ca87 100644
--- a/backends/vulkan/runtime/graph/ops/Utils.h
+++ b/backends/vulkan/runtime/graph/ops/Utils.h
@@ -10,7 +10,7 @@
 
 #ifdef USE_VULKAN_API
 
-#include <ATen/native/vulkan/impl/Arithmetic.h>
+#include <ATen/native/vulkan/impl/Common.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
 
@@ -21,6 +21,23 @@ namespace vulkan {
 #define DECLARE_OP_FN(function) \
   ValueRef function(ComputeGraph& graph, const std::vector<ValueRef>& args);
 
+api::utils::ivec4 get_size_as_ivec4(const vTensor& t);
+
+void bind_tensor_to_descriptor_set(
+    vTensor& tensor,
+    api::PipelineBarrier& pipeline_barrier,
+    const api::MemoryAccessType accessType,
+    api::DescriptorSet& descriptor_set,
+    const uint32_t idx);
+
+uint32_t bind_values_to_descriptor_set(
+    ComputeGraph* graph,
+    const std::vector<ValueRef>& args,
+    api::PipelineBarrier& pipeline_barrier,
+    const api::MemoryAccessType accessType,
+    api::DescriptorSet& descriptor_set,
+    const uint32_t base_idx);
+
 } // namespace vulkan
 } // namespace native
 } // namespace at
diff --git a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
index 8d316e9f48..ce43005384 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
@@ -8,44 +8,39 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h>
 
-#include <ATen/native/vulkan/impl/Common.h>
-
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h>
 
 namespace at {
 namespace native {
 namespace vulkan {
 
-#define DEFINE_ARITHMETIC_FN(function, op_type)                               \
+#define DEFINE_ARITHMETIC_FN(function, shader)                                \
   ValueRef function(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
     return add_arithmetic_node(                                               \
-        graph,                                                                \
-        args[0],                                                              \
-        args[1],                                                              \
-        args[2],                                                              \
-        arithmetic::OpType::op_type,                                          \
-        args[3]);                                                             \
+        graph, args[0], args[1], args[2], VK_KERNEL(shader), args[3]);        \
   }
 
-DEFINE_ARITHMETIC_FN(add, ADD);
-DEFINE_ARITHMETIC_FN(sub, SUB);
-DEFINE_ARITHMETIC_FN(mul, MUL);
-DEFINE_ARITHMETIC_FN(div, DIV);
-DEFINE_ARITHMETIC_FN(floor_div, FLOOR_DIV);
-DEFINE_ARITHMETIC_FN(pow, POW);
+DEFINE_ARITHMETIC_FN(add, add);
+DEFINE_ARITHMETIC_FN(sub, sub);
+DEFINE_ARITHMETIC_FN(mul, mul);
+DEFINE_ARITHMETIC_FN(div, div);
+DEFINE_ARITHMETIC_FN(floor_div, floor_divide);
+DEFINE_ARITHMETIC_FN(pow, pow);
 
+// TODO(T180908843): Bypass this entrypoint function by creating `ValueRef out`
+// ahead of time.
 ValueRef add_arithmetic_node(
     ComputeGraph& graph,
     const ValueRef in1,
     const ValueRef in2,
     const float alpha,
-    const arithmetic::OpType optype,
+    const api::ShaderInfo& shader,
     const int64_t shared_object_idx) {
   std::vector<int64_t> in1_sizes = graph.get_val_sizes(in1);
   api::ScalarType in1_dtype = graph.get_val_dtype(in1);
 
   ValueRef out = graph.add_tensor(in1_sizes, in1_dtype, shared_object_idx);
-  add_arithmetic_node(graph, in1, in2, out, alpha, optype);
+  add_arithmetic_node(graph, in1, in2, out, alpha, shader);
   return out;
 }
 
@@ -67,12 +62,27 @@ void add_arithmetic_node(
     const ValueRef in2,
     const ValueRef out,
     const float alpha,
-    const arithmetic::OpType optype) {
+    const api::ShaderInfo& shader) {
   ValueRef arg1 = prepack_if_tensor_ref(graph, in1);
   ValueRef arg2 = prepack_if_tensor_ref(graph, in2);
 
-  graph.execute_nodes().emplace_back(
-      new ArithmeticNode(arg1, arg2, out, alpha, optype));
+  vTensor& t_in1 = graph.get_val(arg1).toTensor();
+  vTensor& t_in2 = graph.get_val(arg2).toTensor();
+  vTensor& t_out = graph.get_val(out).toTensor();
+
+  api::utils::uvec3 global_size = t_out.extents();
+  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
+
+  ArithmeticParams block{
+      get_size_as_ivec4(t_out),
+      get_size_as_ivec4(t_in1),
+      get_size_as_ivec4(t_in2),
+      1.0,
+  };
+  api::UniformParamsBuffer params(graph.context(), block);
+
+  graph.execute_nodes().emplace_back(new ExecuteNode(
+      shader, global_size, local_size, {out}, {arg1, arg2}, std::move(params)));
 }
 
 ArithmeticPrepack::ArithmeticPrepack(const ValueRef tref, const ValueRef packed)
@@ -92,23 +102,6 @@ void ArithmeticPrepack::encode(ComputeGraph* graph) const {
   encode_copy_to_vtensor(graph->context(), staging, packed);
 }
 
-ArithmeticNode::ArithmeticNode(
-    const ValueRef in1,
-    const ValueRef in2,
-    const ValueRef out,
-    const float alpha,
-    const arithmetic::OpType optype)
-    : ExecuteNode({in1, in2}, {out}), alpha_(alpha), optype_(optype) {}
-
-void ArithmeticNode::encode(ComputeGraph* graph) const {
-  vTensor& in1 = graph->get_val(inputs_[0]).toTensor();
-  vTensor& in2 = graph->get_val(inputs_[1]).toTensor();
-  vTensor& out = graph->get_val(outputs_[0]).toTensor();
-
-  api::ShaderInfo kernel = arithmetic::get_shader(optype_);
-  arithmetic::record_op(graph->context(), kernel, in1, in2, out, alpha_);
-}
-
 } // namespace vulkan
 } // namespace native
 } // namespace at
diff --git a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h
index 767517043b..82e2aa2cdf 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h
+++ b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h
@@ -32,7 +32,7 @@ ValueRef add_arithmetic_node(
     const ValueRef in1,
     const ValueRef in2,
     const float alpha,
-    const arithmetic::OpType optype,
+    const api::ShaderInfo& shader,
     const int64_t shared_object_idx = -1);
 
 void add_arithmetic_node(
@@ -41,29 +41,20 @@ void add_arithmetic_node(
     const ValueRef in2,
     const ValueRef out,
     const float alpha,
-    const arithmetic::OpType optype);
+    const api::ShaderInfo& shader);
 
-class ArithmeticPrepack : public virtual PrepackNode {
- public:
-  explicit ArithmeticPrepack(const ValueRef tref, const ValueRef packed);
-
-  void encode(ComputeGraph* graph) const override;
+struct ArithmeticParams final {
+  api::utils::ivec4 outputSizes;
+  api::utils::ivec4 input1Sizes;
+  api::utils::ivec4 input2Sizes;
+  float alpha;
 };
 
-class ArithmeticNode : public virtual ExecuteNode {
+class ArithmeticPrepack : public virtual PrepackNode {
  public:
-  explicit ArithmeticNode(
-      const ValueRef in1,
-      const ValueRef in2,
-      const ValueRef out,
-      const float alpha,
-      const arithmetic::OpType optype);
+  explicit ArithmeticPrepack(const ValueRef tref, const ValueRef packed);
 
   void encode(ComputeGraph* graph) const override;
-
- private:
-  float alpha_;
-  arithmetic::OpType optype_;
 };
 
 } // namespace vulkan
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
index 459d5edf1b..5b16780777 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
@@ -100,7 +100,7 @@ void encode_copy_from_vtensor(
 
 StagingNode::StagingNode(ValueRef from, ValueRef to) : ExecuteNode(from, to) {}
 
-void StagingNode::encode(ComputeGraph* graph) const {
+void StagingNode::encode(ComputeGraph* graph) {
   Value& in_val = graph->get_val(inputs_[0]);
   Value& out_val = graph->get_val(outputs_[0]);
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.h b/backends/vulkan/runtime/graph/ops/impl/Staging.h
index bb9671d4e9..be57a9817f 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.h
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.h
@@ -88,7 +88,7 @@ class StagingNode : public virtual ExecuteNode {
  public:
   explicit StagingNode(ValueRef from, ValueRef to);
 
-  void encode(ComputeGraph* graph) const override;
+  void encode(ComputeGraph* graph) override;
 };
 
 } // namespace vulkan
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index b36e2352eb..0692d8c709 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -10,7 +10,7 @@
 
 #include <ATen/native/vulkan/api/api.h>
 
-#include <ATen/native/vulkan/impl/Arithmetic.h>
+#include <ATen/native/vulkan/impl/Common.h>
 #include <ATen/native/vulkan/impl/Packing.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Arithmetic.h>
@@ -431,8 +431,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph) {
 
   IOValueRef out = {};
 
-  out.value = add_arithmetic_node(
-      graph, a.value, b.value, 1.0, arithmetic::OpType::ADD);
+  out.value = add_arithmetic_node(graph, a.value, b.value, 1.0, VK_KERNEL(add));
 
   out.staging = graph.set_output_tensor(out.value);
 
@@ -478,9 +477,8 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
 
   IOValueRef a = graph.add_input_tensor(size_big, api::kFloat);
 
-  ValueRef c =
-      add_arithmetic_node(graph, a.value, w1, 1.0, arithmetic::OpType::ADD);
-  ValueRef e = add_arithmetic_node(graph, c, w2, 1.0, arithmetic::OpType::MUL);
+  ValueRef c = add_arithmetic_node(graph, a.value, w1, 1.0, VK_KERNEL(add));
+  ValueRef e = add_arithmetic_node(graph, c, w2, 1.0, VK_KERNEL(mul));
 
   IOValueRef out = {};
   out.value = e;
@@ -528,7 +526,8 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects) {
       api::kFloat,
       /*shared_object_idx = */ 4);
 
-  // Allocation count will be 2 (1 staging buffer for each input tensor)
+  // Allocation count will be 2:
+  // 1 staging buffer for each input tensor
   EXPECT_TRUE(get_vma_allocation_count() == 2);
 
   ValueRef c = add_arithmetic_node(
@@ -536,7 +535,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects) {
       a.value,
       b.value,
       1.0,
-      arithmetic::OpType::ADD,
+      VK_KERNEL(add),
       /*shared_object_idx = */ 6);
 
   IOValueRef d = graph.add_input_tensor(
@@ -544,29 +543,33 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects) {
       api::kFloat,
       /*shared_object_idx = */ 2);
 
-  // Allocation count will be 3 (1 staging buffer for each input tensor)
-  EXPECT_TRUE(get_vma_allocation_count() == 3);
+  // Allocation count will be 4, two are new:
+  // 1 uniform buffer for arithmetic shader params
+  // 1 staging buffer for the input tensor
+  EXPECT_TRUE(get_vma_allocation_count() == 4);
 
   ValueRef e = add_arithmetic_node(
       graph,
       c,
       d.value,
       1.0,
-      arithmetic::OpType::MUL,
+      VK_KERNEL(mul),
       /*shared_object_idx = */ 4);
 
   IOValueRef out = {};
   out.value = e;
   out.staging = graph.set_output_tensor(out.value);
 
-  // Allocation count will be 4 (1 staging buffer for each I/O tensor)
-  EXPECT_TRUE(get_vma_allocation_count() == 4);
+  // Allocation count will be 6, three are new:
+  // 1 uniform buffer for arithmetic shader params
+  // 1 staging buffer for the input tensor
+  EXPECT_TRUE(get_vma_allocation_count() == 6);
 
   graph.encode_execute();
 
   // Allocation count will be 13:
   // 4 staging buffers for each I/O tensor
-  // 6 uniform buffers to store args for each shader dispatch
+  // 6 uniform buffers to store params for each shader dispatch
   // 3 shared objects to back tensor memory
   EXPECT_TRUE(get_vma_allocation_count() == 13);