Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor and class split #4432

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions backends/vulkan/tools/gpuinfo/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"reg_count": {
"enabled": true,
"threshold": 3,
"compensate": 0.1
},
"buf_cacheline_size": {
"enabled": true,
"threshold": 10,
"compensate": 0.1
},
"buffer_bandwidth": {
"enabled": true,
"range": 134217728,
"nflush": 4,
"nunroll": 16,
"niter": 10
},
"ubo_bandwidth": {
"enabled": true,
"range": 134217728,
"nflush": 4,
"nunroll": 16,
"niter": 10
},
"shared_bandwidth": {
"enabled": true,
"nflush": 4,
"nunroll": 16,
"niter": 10
},
"warp_size": {
"enabled": true,
"threshold": 3,
"compensate": 0.1
},
"tex_bandwidth": {
"enabled": true,
"nflush": 4,
"nunroll": 16,
"niter": 10
},
"tex_cacheline_concurr": {
"enabled": true,
"threshold": 3,
"compensate": 0.1
}
}
59 changes: 59 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/tex_bandwidth.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}
#define VEC4_T ${texel_type(DTYPE)}

layout(std430) buffer;

${layout_declare_sampler(0, "r", "A", DTYPE)}
${layout_declare_buffer(1, "w", "B", DTYPE, "PRECISION", False)}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int niter = 1;
layout(constant_id = 4) const int nvec = 1;
layout(constant_id = 5) const int local_group_size = 1;

void main() {
// The address mask works as a modulo because x % 2^n == x & (2^n - 1).
// This will help us limit address accessing to a specific set of unique
// addresses depending on the access size we want to measure.
const int addr_mask = nvec - 1;
vec4 sum = vec4(0);

// This is to distribute the accesses to unique addresses across the workgroups, once the
// size of the access excedes the workgroup width.
const uint workgroup_width = local_group_size * niter * ${NUNROLL};
uint offset = (gl_WorkGroupID[0] * workgroup_width + gl_LocalInvocationID[0]) & addr_mask;

int i = 0;
for (; i < niter; ++i){
VEC4_T in_texel;
$for j in range(int(NUNROLL)):
$if DIM == 0:
in_texel = texelFetch(A, ivec3(offset, 0, 0), 0);
$elif DIM == 1:
in_texel = texelFetch(A, ivec3(0, offset, 0), 0);
$elif DIM == 2:
in_texel = texelFetch(A, ivec3(0, 0, offset), 0);

sum *= in_texel;

// On each unroll, a new unique address will be accessed through the offset,
// limited by the address mask to a specific set of unique addresses
offset = (offset + local_group_size) & addr_mask;
}

// This is to ensure no compiler optimizations occur
vec4 zero = vec4(i>>31);

B[gl_LocalInvocationID[0]] = sum + zero;
}
15 changes: 15 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/tex_bandwidth.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

tex_bandwidth:
parameter_names_with_default_values:
DTYPE: float
NUNROLL: "16"
generate_variant_forall:
DIM:
- RANGE: [0, 2]
shader_variants:
- NAME: tex_bandwidth
39 changes: 39 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/tex_cacheline_concurr.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}
#define VEC4_T ${texel_type(DTYPE)}

layout(std430) buffer;

${layout_declare_sampler(0, "r", "in_tex", DTYPE)}
${layout_declare_buffer(1, "w", "out_buf", DTYPE, "PRECISION", False)}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int niter = 1;

void main() {
vec4 sum = vec4(0);
int i = 0;
for (; i < niter; ++i){
$if DIM == 0:
sum += texelFetch(in_tex, ivec3(gl_GlobalInvocationID[0], 0, 0), 0);
$elif DIM == 1:
sum += texelFetch(in_tex, ivec3(0, gl_GlobalInvocationID[0], 0), 0);
$elif DIM == 2:
sum += texelFetch(in_tex, ivec3(0, 0, gl_GlobalInvocationID[0]), 0);
}

// This is to ensure no compiler optimizations occur
vec4 zero = vec4(i>>31);

out_buf[0] = sum + zero;
}
14 changes: 14 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/tex_cacheline_concurr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

tex_cacheline_concurr:
parameter_names_with_default_values:
DTYPE: float
generate_variant_forall:
DIM:
- RANGE: [0, 2]
shader_variants:
- NAME: tex_cacheline_concurr
114 changes: 114 additions & 0 deletions backends/vulkan/tools/gpuinfo/include/app.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/vulkan/runtime/api/api.h>
#include <folly/json.h>
#include <fstream>
#include <iostream>

#include "utils.h"

namespace gpuinfo {

class App {
private:
folly::dynamic config_;

public:
size_t buf_cache_size;
uint32_t max_shared_mem_size;
uint32_t sm_count;
uint32_t nthread_logic;
uint32_t subgroup_size;
uint32_t max_tex_width;
uint32_t max_tex_height;
uint32_t max_tex_depth;

App() {
context()->initialize_querypool();

std::cout << context()->adapter_ptr()->stringize() << std::endl
<< std::endl;

auto cl_device = get_cl_device();

sm_count = cl_device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
nthread_logic = cl_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
buf_cache_size = cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE>();
max_shared_mem_size = cl_device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
max_tex_width = cl_device.getInfo<CL_DEVICE_IMAGE3D_MAX_WIDTH>();
max_tex_height = cl_device.getInfo<CL_DEVICE_IMAGE3D_MAX_HEIGHT>();
max_tex_depth = cl_device.getInfo<CL_DEVICE_IMAGE3D_MAX_DEPTH>();

VkPhysicalDeviceSubgroupProperties subgroup_props{};
VkPhysicalDeviceProperties2 props2{};

props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
props2.pNext = &subgroup_props;
subgroup_props.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
vkGetPhysicalDeviceProperties2(
context()->adapter_ptr()->physical_handle(), &props2);
subgroup_size = subgroup_props.subgroupSize;

std::cout << std::endl;
std::cout << "SM count," << sm_count << std::endl;
std::cout << "Logic Thread Count," << nthread_logic << std::endl;
std::cout << "Cache Size," << buf_cache_size << std::endl;
std::cout << "Shared Memory Size," << max_shared_mem_size << std::endl;
std::cout << "SubGroup Size," << subgroup_size << std::endl;
std::cout << "MaxTexWidth," << max_tex_width << std::endl;
std::cout << "MaxTexHeight," << max_tex_height << std::endl;
std::cout << "MaxTexDepth," << max_tex_depth << std::endl;
}

float get_config(const std::string& test, const std::string& key) const {
if (config_[test].empty()) {
throw std::runtime_error("Missing config for " + test);
}

if (!config_[test][key].isNumber()) {
throw std::runtime_error(
"Config for " + test + "." + key + " is not a number");
}

float value;
if (config_[test][key].isDouble()) {
value = config_[test][key].getDouble();
} else {
value = config_[test][key].getInt();
}

std::cout << "Read value for " << test << "." << key << " = " << value
<< std::endl;
return value;
}

bool enabled(const std::string& test) const {
if (config_.empty() || config_[test].empty() ||
!config_[test]["enabled"].isBool()) {
return true;
}
return config_[test]["enabled"].getBool();
}

void load_config(std::string file_path) {
std::ifstream file(file_path);
std::stringstream buffer;
buffer << file.rdbuf();
const std::string json_str = buffer.str();
if (json_str.empty()) {
throw std::runtime_error(
"Failed to read config file from " + file_path + ".");
}
config_ = folly::parseJson(json_str);
}
};
} // namespace gpuinfo
Loading
Loading