Skip to content

Commit

Permalink
[]NPU] Update adapter and backend to use new level zero ext table (#2…
Browse files Browse the repository at this point in the history
…7095)

### Details:
- *use ze_graph_dditable_ext_t to replace local
ze_graph_dditable_ext_last_t*
- *use ze_graph_ext_version_t as template parameter instead of
ze_graph_dditable_ext_1_x*
 - *update commit of level-zero-ext to use one ddi table*

Plugin with current driver and driver released later, will use
ze_graph_dditable_ext_t and ZE_extension_graph, the version is
ZE_GRAPH_EXT_VERSION_CURRENT inside current level-zero-ext commit, 1.8
now

Plugin with old driver, will use ze_graph_dditable_ext_t to reinterpret
old ddi table, use largest version supported by driver, , backend work
on ze_graph_ext_version_t, compiler will work based on
ze_graph_ext_version_t. Fpr example, largest ext version supported by
driver is 1.7. plugin will use 1.7.

### Tickets:
 - *155313*
  • Loading branch information
XinWangIntel authored Oct 28, 2024
1 parent dd6ed6c commit 8371289
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 108 deletions.
8 changes: 2 additions & 6 deletions src/plugins/intel_npu/src/backend/include/zero_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@

#include "intel_npu/config/runtime.hpp"

/**
* @brief Last version of Table of Graph Extension functions used within plugin
*/
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_8_t;
/**
* @brief Last version of the Command Queue functions used within plugin
*/
Expand All @@ -34,7 +30,7 @@ using ze_graph_profiling_dditable_ext_last_t = ze_graph_profiling_dditable_ext_t
*/
struct ze_graph_dditable_ext_decorator final {
private:
ze_graph_dditable_ext_last_t* const _impl;
ze_graph_dditable_ext_t* const _impl;
const uint32_t _driverExtVersion;

ze_graph_dditable_ext_decorator(const ze_graph_dditable_ext_decorator&) = delete;
Expand All @@ -53,7 +49,7 @@ struct ze_graph_dditable_ext_decorator final {
}

public:
ze_graph_dditable_ext_decorator(ze_graph_dditable_ext_last_t* impl, uint32_t driverExtVersion)
ze_graph_dditable_ext_decorator(ze_graph_dditable_ext_t* impl, uint32_t driverExtVersion)
: _impl(impl),
_driverExtVersion(driverExtVersion),
// version 1.0
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/src/zero_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ uint32_t ZeroEngineBackend::getGraphExtVersion() const {
}

bool ZeroEngineBackend::isBatchingSupported() const {
return _instance->isExtensionSupported(std::string(ZE_GRAPH_EXT_NAME_1_6), ZE_MAKE_VERSION(1, 6));
return _instance->isExtensionSupported("ZE_extension_graph_1_6", ZE_MAKE_VERSION(1, 6));
}

bool ZeroEngineBackend::isCommandQueueExtSupported() const {
Expand Down
42 changes: 40 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "zero_init.hpp"

#include <regex>

#include "intel_npu/common/itt.hpp"
#include "intel_npu/utils/zero/zero_api.hpp"
#include "intel_npu/utils/zero/zero_utils.hpp"
Expand Down Expand Up @@ -114,14 +116,50 @@ ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder",
// Query our graph extension version
std::string graph_ext_name;
uint32_t graph_ext_version = 0;
uint32_t target_graph_ext_version = ZE_GRAPH_EXT_VERSION_CURRENT;

#if defined(NPU_PLUGIN_DEVELOPER_BUILD)
const char* extVersion = std::getenv("NPU_ZE_GRAPH_EXT_VERSION");
if (extVersion) {
std::string extVersionString(extVersion);
std::regex extVersionRegex(R"(^(\d+)\.(\d+)$)");
std::smatch match;

if (std::regex_match(extVersionString, match, extVersionRegex)) {
int major = std::stoi(match[1].str());
int minor = std::stoi(match[2].str());
log.debug("Try to find graph ext version: %d.%d instead of %d.%d.",
major,
minor,
ZE_MAJOR_VERSION(target_graph_ext_version),
ZE_MINOR_VERSION(target_graph_ext_version));
target_graph_ext_version = ZE_MAKE_VERSION(major, minor);
}
}
#endif

log.debug("Try to find graph ext version: %d.%d",
ZE_MAJOR_VERSION(target_graph_ext_version),
ZE_MINOR_VERSION(target_graph_ext_version));
std::tie(graph_ext_version, graph_ext_name) =
queryDriverExtensionVersion(ZE_GRAPH_EXT_NAME, ZE_GRAPH_EXT_VERSION_CURRENT, extProps, count);
queryDriverExtensionVersion(ZE_GRAPH_EXT_NAME, target_graph_ext_version, extProps, count);

if (graph_ext_name.empty()) {
OPENVINO_THROW("queryGraphExtensionVersion: Failed to find Graph extension in NPU Driver");
}

// Use version that plugin can support as identifier to control workflow
if (graph_ext_version > target_graph_ext_version) {
log.warning("Graph extension version from driver is %d.%d. "
"Larger than plugin max graph ext version %d.%d. "
"Force to use plugin ext version with the new table to control flow!",
ZE_MAJOR_VERSION(graph_ext_version),
ZE_MINOR_VERSION(graph_ext_version),
ZE_MAJOR_VERSION(target_graph_ext_version),
ZE_MINOR_VERSION(target_graph_ext_version));
graph_ext_version = target_graph_ext_version;
}

const uint16_t supported_driver_ext_major_version = 1;
const uint16_t driver_ext_major_version = ZE_MAJOR_VERSION(graph_ext_version);
if (supported_driver_ext_major_version != driver_ext_major_version) {
Expand Down Expand Up @@ -166,7 +204,7 @@ ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder",
command_queue_ext_version);

// Load our graph extension
ze_graph_dditable_ext_last_t* graph_ddi_table_ext = nullptr;
ze_graph_dditable_ext_t* graph_ddi_table_ext = nullptr;
THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGetExtensionFunctionAddress",
zeDriverGetExtensionFunctionAddress(driver_handle,
graph_ext_name.c_str(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,34 @@ namespace driverCompilerAdapter {

using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;

#define NotSupportQuery(T) (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value)
#define NotSupportQuery(T) (T == ZE_GRAPH_EXT_VERSION_1_2)

// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy,
// pfnQueryNetworkGetSupportedLayers)
#define SupportAPIGraphQueryNetworkV1(T) \
(std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || std::is_same<T, ze_graph_dditable_ext_1_4_t>::value)
#define SupportAPIGraphQueryNetworkV1(T) (T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4)

// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory)
#define SupportAPIGraphQueryNetworkV2(T) ((!NotSupportQuery(T) && !SupportAPIGraphQueryNetworkV1(T)))

// For ext version >= 1.5, pfnCreate2 api is avaible
#define NotSupportGraph2(T) \
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value)
#define NotSupportGraph2(T) \
(T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4)

// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to
// "ze_graph_dditable_ext_1_6_t".
// See: E#117498
#define NotSupportArgumentMetadata(T) \
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)
#define NotSupportArgumentMetadata(T) \
(T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \
T == ZE_GRAPH_EXT_VERSION_1_5)

#define UseCopyForNativeBinary(T) \
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value || \
std::is_same<T, ze_graph_dditable_ext_1_6_t>::value)
#define UseCopyForNativeBinary(T) \
(T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \
T == ZE_GRAPH_EXT_VERSION_1_5 || T == ZE_GRAPH_EXT_VERSION_1_6)

/**
* Adapter to use CiD through ZeroAPI
*/
template <typename TableExtension>
template <ze_graph_ext_version_t TableExtension>
class LevelZeroCompilerInDriver final : public ICompiler {
public:
LevelZeroCompilerInDriver(ze_driver_handle_t driverHandle,
Expand Down Expand Up @@ -79,7 +76,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {
OPENVINO_THROW("Profiling post-processing is not implemented.");
}

template <typename T = TableExtension, std::enable_if_t<!NotSupportQuery(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension, std::enable_if_t<!NotSupportQuery(T), bool> = true>
std::unordered_set<std::string> getQueryResultFromSupportedLayers(
ze_result_t result,
ze_graph_query_network_handle_t& hGraphQueryNetwork) const;
Expand Down Expand Up @@ -111,47 +108,54 @@ class LevelZeroCompilerInDriver final : public ICompiler {
ze_graph_compiler_version_info_t compilerVersion) const;
std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const;

template <typename T = TableExtension, typename std::enable_if_t<NotSupportArgumentMetadata(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<NotSupportArgumentMetadata(T), bool> = true>
void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const;

template <typename T = TableExtension, typename std::enable_if_t<!NotSupportArgumentMetadata(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<!NotSupportArgumentMetadata(T), bool> = true>
void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const;

template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
const uint8_t*& blobPtr,
size_t& blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
const uint8_t*& blobPtr,
size_t& blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
const Config& config,
ze_device_graph_properties_t deviceGraphProperties,
const ze_device_handle_t& _deviceHandle,
ze_graph_query_network_handle_t& hGraphQueryNetwork) const;

// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory)
template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
std::unordered_set<std::string> queryImpl(const std::shared_ptr<const ov::Model>& model,
const Config& config) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV1(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<SupportAPIGraphQueryNetworkV1(T), bool> = true>
ze_result_t seriazlideIRModelAndQueryNetworkCreateV1(const std::shared_ptr<const ov::Model>& model,
const Config& config,
ze_device_graph_properties_t deviceGraphProperties,
Expand All @@ -160,23 +164,24 @@ class LevelZeroCompilerInDriver final : public ICompiler {

// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy,
// pfnQueryNetworkGetSupportedLayers)
template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV1(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension,
typename std::enable_if_t<SupportAPIGraphQueryNetworkV1(T), bool> = true>
std::unordered_set<std::string> queryImpl(const std::shared_ptr<const ov::Model>& model,
const Config& config) const;

// For ext version < 1.3
template <typename T = TableExtension, typename std::enable_if_t<NotSupportQuery(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension, typename std::enable_if_t<NotSupportQuery(T), bool> = true>
std::unordered_set<std::string> queryImpl(const std::shared_ptr<const ov::Model>& model,
const Config& config) const;

template <typename T = TableExtension, typename std::enable_if_t<NotSupportGraph2(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension, typename std::enable_if_t<NotSupportGraph2(T), bool> = true>
ze_result_t createGraph(const ze_graph_format_t& format,
const SerializedIR& serializedIR,
const std::string& buildFlags,
const uint32_t& flags,
ze_graph_handle_t* graph) const;

template <typename T = TableExtension, typename std::enable_if_t<!NotSupportGraph2(T), bool> = true>
template <ze_graph_ext_version_t T = TableExtension, typename std::enable_if_t<!NotSupportGraph2(T), bool> = true>
ze_result_t createGraph(const ze_graph_format_t& format,
const SerializedIR& serializedIR,
const std::string& buildFlags,
Expand Down
Loading

0 comments on commit 8371289

Please sign in to comment.