From 1c16288f5bb5fe28fe667cc447e385cfbc4db901 Mon Sep 17 00:00:00 2001
From: Stephen Jia <ssjia@meta.com>
Date: Wed, 6 Mar 2024 16:44:52 -0800
Subject: [PATCH] Use lazy descriptor pool allocation (#2285)

Summary:

## Context

In Vulkan, memory for Descriptor Sets (which are used to bind data to shader arguments) must be pre-allocated. Previously, the convention is that a large number of descriptor sets are allocated upon creation of a Vulkan Context. While this worked well in Lite Interpreter, where only a global vulkan context is used, it will lead to overallocating descriptor sets in the Vulkan Delegate, where every `ComputeGraph` has its own dedicated Context.

https://github.com/pytorch/pytorch/pull/121134 allows the Descriptor Set pool to be initialized in a deferred fashion. This means that a ComputeGraph can count the total number of descriptors needed across all the compute shaders that will be encoded, and then allocate a Descriptor Set Pool of the appropriate size.

## Implementation Overview

1. When constructing `ComputeGraph`, make sure that the descriptor pool config contains 0 for number of max sets. This will ensure that no descriptor pool will be initialized when constructing the graph's `api::Context` instance
2. When building the graph, `ExecuteNode` and `PrepackNode` will call `graph.update_descriptor_counts(shader)` upon construction, which allows `ComputeGraph` to count the total number of descriptor sets needed.
3. There is a separate descriptor count object for prepack and execute, since they correspond to different command buffers.
4. Before encoding any command buffers, call `graph.prepare()` which will construct a descriptor pool config from the descriptor counts.

## Notes

One interesting finding is that I had to apply a safety factor to the descriptor counts to prevent the pool from running out of memory. This was reproducible on both Linux and Android.

A more robust design, i.e. as discussed [here](https://www.reddit.com/r/vulkan/comments/17v66fi/question_about_descriptor_pool_allocations/) may be to maintain separate descriptor pools for each layout type. We should revisit this refactor at a later time.

Reviewed By: jorgep31415

Differential Revision: D54603935
---
 backends/vulkan/runtime/VulkanBackend.cpp     | 37 +----------
 .../vulkan/runtime/graph/ComputeGraph.cpp     | 65 +++++++++++++++++++
 backends/vulkan/runtime/graph/ComputeGraph.h  | 13 ++++
 backends/vulkan/runtime/graph/GraphConfig.cpp | 56 ++++++++++++++++
 backends/vulkan/runtime/graph/GraphConfig.h   | 10 +++
 .../vulkan/runtime/graph/ops/ExecuteNode.cpp  | 15 +++++
 .../vulkan/runtime/graph/ops/ExecuteNode.h    |  8 +--
 .../vulkan/runtime/graph/ops/PrepackNode.cpp  | 17 +++++
 .../vulkan/runtime/graph/ops/PrepackNode.h    |  9 +--
 .../runtime/graph/ops/impl/Arithmetic.cpp     |  1 +
 .../vulkan/runtime/graph/ops/impl/Staging.cpp |  4 +-
 .../vulkan/test/vulkan_compute_api_test.cpp   | 51 ++++-----------
 12 files changed, 198 insertions(+), 88 deletions(-)
 create mode 100644 backends/vulkan/runtime/graph/GraphConfig.cpp

diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index 1222ee38e5..b5d3441886 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -62,39 +62,6 @@ api::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   }
 }
 
-GraphConfig generate_config() {
-  const uint32_t submit_frequency = UINT32_MAX;
-
-  const api::CommandPoolConfig cmd_config{
-      4u, // cmdPoolInitialSize
-      2u, // cmdPoolBatchSize
-  };
-
-  const api::DescriptorPoolConfig descriptor_pool_config{
-      1024u, // descriptorPoolMaxSets
-      1024u, // descriptorUniformBufferCount
-      1024u, // descriptorStorageBufferCount
-      1024u, // descriptorCombinedSamplerCount
-      1024u, // descriptorStorageImageCount
-      32u, // descriptorPileSizes
-  };
-
-  const api::QueryPoolConfig query_pool_config{};
-
-  const api::ContextConfig context_config{
-      submit_frequency, // cmdSubmitFrequency
-      cmd_config, // cmdPoolConfig
-      descriptor_pool_config, // descriptorPoolConfig
-      query_pool_config, // queryPoolConfig
-  };
-
-  const GraphConfig graph_config{
-      context_config,
-  };
-
-  return graph_config;
-}
-
 class GraphBuilder {
   ComputeGraph* compute_graph_;
   VkGraphPtr flatbuffer_;
@@ -269,6 +236,8 @@ class VulkanBackend final : public PyTorchBackendInterface {
 
     builder.build_graph();
 
+    compute_graph->prepare();
+
     compute_graph->encode_prepack();
     compute_graph->prepack();
 
@@ -284,7 +253,7 @@ class VulkanBackend final : public PyTorchBackendInterface {
     ComputeGraph* compute_graph = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
         context.get_runtime_allocator(), ComputeGraph);
 
-    new (compute_graph) ComputeGraph(generate_config());
+    new (compute_graph) ComputeGraph(GraphConfig());
 
     Error err = compileModel(processed->data(), compute_graph);
 
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index 0900dfb9c1..6aa9171d9f 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -18,6 +18,8 @@ namespace vulkan {
 
 ComputeGraph::ComputeGraph(GraphConfig config)
     : config_{config},
+      prepack_descriptor_counts_{},
+      execute_descriptor_counts_{},
       context_{new api::Context(
           api::runtime()->default_adapter_i(),
           config_.contextConfig)},
@@ -27,6 +29,19 @@ ComputeGraph::ComputeGraph(GraphConfig config)
       execute_nodes_{},
       inputs_{},
       outputs_{} {
+  // Ensure that descriptor counts are initialized to 0
+  prepack_descriptor_counts_.descriptorPoolMaxSets = 0;
+  prepack_descriptor_counts_.descriptorUniformBufferCount = 0;
+  prepack_descriptor_counts_.descriptorStorageBufferCount = 0;
+  prepack_descriptor_counts_.descriptorCombinedSamplerCount = 0;
+  prepack_descriptor_counts_.descriptorStorageImageCount = 0;
+
+  execute_descriptor_counts_.descriptorPoolMaxSets = 0;
+  execute_descriptor_counts_.descriptorUniformBufferCount = 0;
+  execute_descriptor_counts_.descriptorStorageBufferCount = 0;
+  execute_descriptor_counts_.descriptorCombinedSamplerCount = 0;
+  execute_descriptor_counts_.descriptorStorageImageCount = 0;
+
   context_->set_cmd(/*reusable = */ true);
 }
 
@@ -39,6 +54,33 @@ ComputeGraph::~ComputeGraph() {
   context_->flush();
 }
 
+void ComputeGraph::update_descriptor_counts(
+    const api::ShaderInfo& shader_info,
+    bool execute) {
+  api::DescriptorPoolConfig* config =
+      execute ? &execute_descriptor_counts_ : &prepack_descriptor_counts_;
+
+  config->descriptorPoolMaxSets += 1;
+  for (const VkDescriptorType arg_type : shader_info.kernel_layout) {
+    switch (arg_type) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+        config->descriptorUniformBufferCount += 1;
+        break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+        config->descriptorStorageBufferCount += 1;
+        break;
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+        config->descriptorCombinedSamplerCount += 1;
+        break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+        config->descriptorStorageImageCount += 1;
+        break;
+      default:
+        VK_THROW("Unsupported descriptor type!");
+    }
+  }
+}
+
 ValueRef ComputeGraph::add_tensor(
     const std::vector<int64_t>& sizes,
     const api::ScalarType dtype,
@@ -138,6 +180,29 @@ void ComputeGraph::copy_from_staging(
   copy_staging_to_ptr(staging, data, nbytes);
 }
 
+void ComputeGraph::prepare() {
+#define MERGE_FIELD(field)                    \
+  static_cast<uint32_t>(std::ceil(            \
+      std::max(                               \
+          execute_descriptor_counts_.field,   \
+          prepack_descriptor_counts_.field) * \
+      config_.descriptorPoolSafetyFactor))
+
+  api::DescriptorPoolConfig config{
+      MERGE_FIELD(descriptorPoolMaxSets),
+      MERGE_FIELD(descriptorUniformBufferCount),
+      MERGE_FIELD(descriptorStorageBufferCount),
+      MERGE_FIELD(descriptorCombinedSamplerCount),
+      MERGE_FIELD(descriptorStorageImageCount),
+      1u,
+  };
+
+  if (!context_->descriptor_pool()) {
+    context_->descriptor_pool().init(config);
+  }
+#undef MERGE_FIELD
+}
+
 void ComputeGraph::encode_prepack() {
   for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
     node->encode(this);
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index a45e449ae2..7917304f0c 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -60,6 +60,9 @@ class ComputeGraph final {
 
  private:
   GraphConfig config_;
+  api::DescriptorPoolConfig prepack_descriptor_counts_;
+  api::DescriptorPoolConfig execute_descriptor_counts_;
+
   std::unique_ptr<api::Context> context_;
   std::vector<SharedObject> shared_objects_;
   std::vector<Value> values_;
@@ -87,6 +90,10 @@ class ComputeGraph final {
     return outputs_;
   }
 
+  void update_descriptor_counts(
+      const api::ShaderInfo& shader_info,
+      bool execute);
+
   /*
    * Returns the value at a particular reference
    */
@@ -163,6 +170,12 @@ class ComputeGraph final {
 
   SharedObject& get_shared_object(const int64_t idx);
 
+  //
+  // Graph Preparation
+  //
+
+  void prepare();
+
   //
   // Input/Output
   //
diff --git a/backends/vulkan/runtime/graph/GraphConfig.cpp b/backends/vulkan/runtime/graph/GraphConfig.cpp
new file mode 100644
index 0000000000..8cda518dae
--- /dev/null
+++ b/backends/vulkan/runtime/graph/GraphConfig.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/GraphConfig.h>
+
+namespace at {
+namespace native {
+namespace vulkan {
+
+GraphConfig::GraphConfig() {
+  // No automatic submissions
+  const uint32_t submit_frequency = UINT32_MAX;
+
+  // Only one command buffer will be encoded at a time
+  const api::CommandPoolConfig cmd_config{
+      1u, // cmdPoolInitialSize
+      1u, // cmdPoolBatchSize
+  };
+
+  // Use lazy descriptor pool initialization by default; the graph runtime will
+  // tally up the number of descriptor sets needed while building the graph and
+  // trigger descriptor pool initialization with exact sizes before encoding the
+  // command buffer.
+  const api::DescriptorPoolConfig descriptor_pool_config{
+      0u, // descriptorPoolMaxSets
+      0u, // descriptorUniformBufferCount
+      0u, // descriptorStorageBufferCount
+      0u, // descriptorCombinedSamplerCount
+      0u, // descriptorStorageImageCount
+      0u, // descriptorPileSizes
+  };
+
+  const api::QueryPoolConfig query_pool_config{};
+
+  const api::ContextConfig context_config{
+      submit_frequency, // cmdSubmitFrequency
+      cmd_config, // cmdPoolConfig
+      descriptor_pool_config, // descriptorPoolConfig
+      query_pool_config, // queryPoolConfig
+  };
+
+  contextConfig = context_config;
+
+  // Empirically selected safety factor. If descriptor pools start running out
+  // of memory, increase this safety factor.
+  descriptorPoolSafetyFactor = 1.25;
+}
+
+} // namespace vulkan
+} // namespace native
+} // namespace at
diff --git a/backends/vulkan/runtime/graph/GraphConfig.h b/backends/vulkan/runtime/graph/GraphConfig.h
index 0cb9bb6f53..e2c8d6bed0 100644
--- a/backends/vulkan/runtime/graph/GraphConfig.h
+++ b/backends/vulkan/runtime/graph/GraphConfig.h
@@ -18,6 +18,16 @@ namespace vulkan {
 
 struct GraphConfig final {
   api::ContextConfig contextConfig;
+
+  // Creating a descriptor pool with exactly the number of descriptors tallied
+  // by iterating through the shader layouts of shaders used in the graph risks
+  // the descriptor pool running out of memory, therefore apply a safety factor
+  // to descriptor counts when creating the descriptor pool to mitigate this
+  // risk.
+  float descriptorPoolSafetyFactor;
+
+  // Generate a default graph config with pre-configured settings
+  explicit GraphConfig();
 };
 
 } // namespace vulkan
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
index 7c1f0fe807..c9c338bc17 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
@@ -16,6 +16,21 @@ namespace at {
 namespace native {
 namespace vulkan {
 
+ExecuteNode::ExecuteNode(
+    ComputeGraph& graph,
+    const api::ShaderInfo& shader,
+    const api::utils::uvec3& global_workgroup_size,
+    const api::utils::uvec3& local_workgroup_size,
+    const std::vector<ArgGroup>& args,
+    api::UniformParamsBuffer&& params)
+    : shader_(shader),
+      global_workgroup_size_(global_workgroup_size),
+      local_workgroup_size_(local_workgroup_size),
+      args_(args),
+      params_(std::move(params)) {
+  graph.update_descriptor_counts(shader, /*execute = */ true);
+}
+
 void ExecuteNode::encode(ComputeGraph* graph) {
   api::Context* const context = graph->context();
   api::PipelineBarrier pipeline_barrier{};
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
index ddd50c1f67..f3c2bba9c0 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -50,16 +50,12 @@ class ExecuteNode final {
 
  public:
   ExecuteNode(
+      ComputeGraph& graph,
       const api::ShaderInfo& shader,
       const api::utils::uvec3& global_workgroup_size,
       const api::utils::uvec3& local_workgroup_size,
       const std::vector<ArgGroup>& args,
-      api::UniformParamsBuffer&& params)
-      : shader_(shader),
-        global_workgroup_size_(global_workgroup_size),
-        local_workgroup_size_(local_workgroup_size),
-        args_(args),
-        params_(std::move(params)) {}
+      api::UniformParamsBuffer&& params);
 
   ~ExecuteNode() = default;
 
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
index d16c671ba4..69e6ffabd6 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
@@ -17,6 +17,23 @@ namespace at {
 namespace native {
 namespace vulkan {
 
+PrepackNode::PrepackNode(
+    ComputeGraph& graph,
+    const api::ShaderInfo& shader,
+    const api::utils::uvec3& global_workgroup_size,
+    const api::utils::uvec3& local_workgroup_size,
+    const ValueRef tref,
+    const ValueRef packed,
+    api::UniformParamsBuffer&& params)
+    : shader_(shader),
+      global_workgroup_size_(global_workgroup_size),
+      local_workgroup_size_(local_workgroup_size),
+      tref_(tref),
+      packed_(packed),
+      params_(std::move(params)) {
+  graph.update_descriptor_counts(shader, /*execute = */ false);
+}
+
 void PrepackNode::encode(ComputeGraph* graph) {
   api::Context* const context = graph->context();
   api::PipelineBarrier pipeline_barrier{};
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h
index b3a5fd0086..59071e9371 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.h
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h
@@ -33,18 +33,13 @@ class PrepackNode final {
 
  public:
   PrepackNode(
+      ComputeGraph& graph,
       const api::ShaderInfo& shader,
       const api::utils::uvec3& global_workgroup_size,
       const api::utils::uvec3& local_workgroup_size,
       const ValueRef tref,
       const ValueRef packed,
-      api::UniformParamsBuffer&& params)
-      : shader_(shader),
-        global_workgroup_size_(global_workgroup_size),
-        local_workgroup_size_(local_workgroup_size),
-        tref_(tref),
-        packed_(packed),
-        params_(std::move(params)) {}
+      api::UniformParamsBuffer&& params);
 
   ~PrepackNode() = default;
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
index f5895c1544..108ff2b2dc 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Arithmetic.cpp
@@ -72,6 +72,7 @@ void add_arithmetic_node(
   api::UniformParamsBuffer params(graph.context(), block);
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
       shader,
       global_size,
       local_size,
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
index 41104532d4..953a06426a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
@@ -48,6 +48,7 @@ void add_staging_to_tensor_node(
       graph.context(), create_staging_params(t_out));
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
       shader,
       global_size,
       local_size,
@@ -90,6 +91,7 @@ void add_tensor_to_staging_node(
   }
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
       shader,
       global_size,
       local_size,
@@ -112,7 +114,7 @@ ValueRef prepack(ComputeGraph& graph, const ValueRef vref) {
   api::UniformParamsBuffer params(graph.context(), sp);
 
   graph.prepack_nodes().emplace_back(new PrepackNode(
-      shader, global_size, local_size, vref, v, std::move(params)));
+      graph, shader, global_size, local_size, vref, v, std::move(params)));
 
   return v;
 }
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index c53444ff0b..5c1fc8f3c5 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -108,39 +108,6 @@ size_t get_vma_allocation_count() {
   return get_vma_stats().total.statistics.allocationCount;
 }
 
-GraphConfig generate_graph_config() {
-  const uint32_t submit_frequency = UINT32_MAX;
-
-  const api::CommandPoolConfig cmd_config{
-      4u, // cmdPoolInitialSize
-      2u, // cmdPoolBatchSize
-  };
-
-  const api::DescriptorPoolConfig descriptor_pool_config{
-      1024u, // descriptorPoolMaxSets
-      1024u, // descriptorUniformBufferCount
-      1024u, // descriptorStorageBufferCount
-      1024u, // descriptorCombinedSamplerCount
-      1024u, // descriptorStorageImageCount
-      32u, // descriptorPileSizes
-  };
-
-  const api::QueryPoolConfig query_pool_config{};
-
-  const api::ContextConfig context_config{
-      submit_frequency, // cmdSubmitFrequency
-      cmd_config, // cmdPoolConfig
-      descriptor_pool_config, // descriptorPoolConfig
-      query_pool_config, // queryPoolConfig
-  };
-
-  const GraphConfig graph_config{
-      context_config,
-  };
-
-  return graph_config;
-}
-
 //
 // Test Wrapper
 //
@@ -428,7 +395,7 @@ TEST_F(VulkanComputeAPITest, use_non_bound_textures_fails) {
   graph.copy_from_staging(name.staging, data_##name.data(), data_##name.size());
 
 TEST(VulkanComputeGraphTest, test_values_scalars) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   ValueRef idx;
@@ -441,7 +408,7 @@ TEST(VulkanComputeGraphTest, test_values_scalars) {
 }
 
 TEST(VulkanComputeGraphTest, test_values_scalar_list_inplace_constructed) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   ValueRef idx = graph.add_scalar_list<int64_t>({1, 2, 3, 4});
@@ -453,7 +420,7 @@ TEST(VulkanComputeGraphTest, test_values_scalar_list_inplace_constructed) {
 }
 
 TEST(VulkanComputeGraphTest, test_values_scalar_list_outside_constructed) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   ValueRef idx;
@@ -469,7 +436,7 @@ TEST(VulkanComputeGraphTest, test_values_scalar_list_outside_constructed) {
 }
 
 TEST(VulkanComputeGraphTest, test_values_string) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   ValueRef idx;
@@ -482,7 +449,7 @@ TEST(VulkanComputeGraphTest, test_values_string) {
 }
 
 TEST(VulkanComputeGraphTest, test_simple_graph) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   std::vector<int64_t> size_big = {4, 4, 4};
@@ -502,6 +469,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph) {
 
   out.staging = graph.set_output_tensor(out.value);
 
+  graph.prepare();
   graph.encode_execute();
 
   // Run graph
@@ -531,7 +499,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph) {
   ValueRef name = graph.add_tensorref(sizes, api::kFloat, data_##name.data());
 
 TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   std::vector<int64_t> size_big = {4, 4, 4};
@@ -554,6 +522,8 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
   out.value = e;
   out.staging = graph.set_output_tensor(out.value);
 
+  graph.prepare();
+
   graph.encode_prepack();
   graph.prepack();
 
@@ -579,7 +549,7 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
 }
 
 TEST(VulkanComputeGraphTest, test_simple_shared_objects) {
-  GraphConfig config = generate_graph_config();
+  GraphConfig config;
   ComputeGraph graph(config);
 
   std::vector<int64_t> size_big = {4, 4, 4};
@@ -637,6 +607,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects) {
   // 1 staging buffer for the input tensor
   EXPECT_TRUE(get_vma_allocation_count() == 10);
 
+  graph.prepare();
   graph.encode_execute();
 
   // Allocation count will be 13, three shared objects are allocated for total: