From ef323f7b88f5497ae4227aee17a230ad6d1416cb Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 9 Aug 2024 18:22:50 -0700 Subject: [PATCH 01/50] Add sycl-compress --- buildbot/configure.py | 4 +- sycl-compress/CMakeLists.txt | 56 +++++++++++++++++++ .../include/sycl-compress/sycl-compress.h | 1 + sycl-compress/src/CMakeLists.txt | 24 ++++++++ sycl-compress/src/sycl-compress.cpp | 14 +++++ 5 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 sycl-compress/CMakeLists.txt create mode 100644 sycl-compress/include/sycl-compress/sycl-compress.h create mode 100644 sycl-compress/src/CMakeLists.txt create mode 100644 sycl-compress/src/sycl-compress.cpp diff --git a/buildbot/configure.py b/buildbot/configure.py index fc89f8b7b00bf..5baca2731b46b 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -21,7 +21,7 @@ def do_configure(args): if not os.path.isdir(abs_obj_dir): os.makedirs(abs_obj_dir) - llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw" + llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw;sycl-compress" # libdevice build requires a working SYCL toolchain, which is not the case # with macOS target right now. @@ -44,6 +44,7 @@ def do_configure(args): spirv_dir = os.path.join(abs_src_dir, "llvm-spirv") xpti_dir = os.path.join(abs_src_dir, "xpti") xptifw_dir = os.path.join(abs_src_dir, "xptifw") + sycl_compress_dir = os.path.join(abs_src_dir, "sycl-compress") libdevice_dir = os.path.join(abs_src_dir, "libdevice") fusion_dir = os.path.join(abs_src_dir, "sycl-fusion") llvm_targets_to_build = args.host_target @@ -173,6 +174,7 @@ def do_configure(args): "-DLLVM_EXTERNAL_XPTI_SOURCE_DIR={}".format(xpti_dir), "-DXPTI_SOURCE_DIR={}".format(xpti_dir), "-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR={}".format(xptifw_dir), + "-DLLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR={}".format(sycl_compress_dir), "-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR={}".format(libdevice_dir), "-DLLVM_EXTERNAL_SYCL_FUSION_SOURCE_DIR={}".format(fusion_dir), "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt new file mode 100644 index 0000000000000..671f55fb149c7 --- /dev/null +++ b/sycl-compress/CMakeLists.txt @@ -0,0 +1,56 @@ +cmake_minimum_required(VERSION 3.20.0) + +set(SYCL_COMPRESS_VERSION 0.0.7) +project (sycl-compress VERSION "${SYCL_COMPRESS_VERSION}" LANGUAGES CXX) + +# Setting the same version as SYCL +set(CMAKE_CXX_STANDARD 17) + +set(SYCL_COMPRESS_DIR ${CMAKE_CURRENT_LIST_DIR}) + +set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE) + +set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) + +# Download and build zstd +include(ExternalProject) +ExternalProject_Add(zstd + URL https://github.com/facebook/zstd/releases/download/v1.5.6/zstd-1.5.6.tar.gz + URL_HASH SHA256=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 + SOURCE_SUBDIR build/cmake + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/zstd/install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF +) + +set(ZSTD_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a) +set(ZSTD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/include) + +include_directories(${ZSTD_INCLUDE_DIR}) + +add_subdirectory(src) +add_dependencies(sycl-compress zstd) +target_link_libraries(sycl-compress ${ZSTD_LIBRARY}) + +if (LLVM_BINARY_DIR) + file(GLOB_RECURSE SYCL_COMPRESS_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress/*") + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "${LLVM_BINARY_DIR}" + SYCL_COMPRESS_HEADERS_OUT_LIST "${SYCL_COMPRESS_HEADERS_LIST}") + add_custom_target(sycl-compress-headers + DEPENDS ${SYCL_COMPRESS_HEADERS_OUT_LIST}) + + add_custom_command( + OUTPUT ${SYCL_COMPRESS_HEADERS_OUT_LIST} + DEPENDS ${SYCL_COMPRESS_HEADERS_LIST} + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress + ${LLVM_BINARY_DIR}/include/sycl-compress + COMMENT "Copying sycl-compress headers ..." + ) + add_dependencies(sycl-compress sycl-compress-headers zstd) +endif() + +include(GNUInstallDirs) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT sycl-compress +) \ No newline at end of file diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h new file mode 100644 index 0000000000000..d2a5425e9334f --- /dev/null +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -0,0 +1 @@ +int foo(int a); \ No newline at end of file diff --git a/sycl-compress/src/CMakeLists.txt b/sycl-compress/src/CMakeLists.txt new file mode 100644 index 0000000000000..c49ec98722eb6 --- /dev/null +++ b/sycl-compress/src/CMakeLists.txt @@ -0,0 +1,24 @@ +include(GNUInstallDirs) + +macro(add_syclcompress_lib target_name) + add_library(${target_name} STATIC ${ARGN}) + target_compile_definitions(${target_name} PRIVATE -DLIB_SYCL_COMPRESS_STATIC_LIBRARY) + target_include_directories(${target_name} PRIVATE ${SYCL_COMPRESS_DIR}/include ${ZSTD_INCLUDE_DIR}) + + # if (MSVC) + # target_compile_options(${target_name} PRIVATE /EHsc) + # endif() + + # Set the location of the library installation + install(TARGETS ${target_name} + RUNTIME DESTINATION bin COMPONENT sycl-compress + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress + ) +endmacro() + +set(SOURCES + sycl-compress.cpp +) + +add_syclcompress_lib(sycl-compress ${SOURCES}) \ No newline at end of file diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp new file mode 100644 index 0000000000000..6b2e33088d90d --- /dev/null +++ b/sycl-compress/src/sycl-compress.cpp @@ -0,0 +1,14 @@ +#include +#include + +__attribute__((visibility("default"))) int compressBlob(void *src, size_t srcSize, + void *dst, int level) { + void* dstBuffer = malloc(srcSize); + size_t dstSize = ZSTD_compress(src, srcSize, dstBuffer, srcSize, level); + dst = dstBuffer; + return dstSize; +} + +int main() { + return 0; +} \ No newline at end of file From bdab2f05c3c5b4bdd34ccc922ae7785c36ec5977 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 17 Aug 2024 10:01:06 -0700 Subject: [PATCH 02/50] Fix decompression in RT --- .../tools/clang-linker-wrapper/CMakeLists.txt | 6 + .../ClangLinkerWrapper.cpp | 2 + .../clang-offload-wrapper/CMakeLists.txt | 8 + .../ClangOffloadWrapper.cpp | 507 ++++++++++-------- sycl-compress/CMakeLists.txt | 6 +- .../include/sycl-compress/sycl-compress.h | 5 +- sycl-compress/src/CMakeLists.txt | 2 +- sycl-compress/src/sycl-compress.cpp | 58 +- sycl/CMakeLists.txt | 2 + sycl/source/CMakeLists.txt | 4 + sycl/source/detail/compiler.hpp | 6 +- sycl/source/detail/device_binary_image.cpp | 68 ++- .../program_manager/program_manager.cpp | 23 +- 13 files changed, 447 insertions(+), 250 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 2692160fb429f..171e173eeaea3 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -34,6 +34,11 @@ add_clang_tool(clang-linker-wrapper ${tablegen_deps} ) +add_dependencies(clang-linker-wrapper sycl-compress) +include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) +message(CHECK_START "Finding sycl-compress") +set(SYCL_COMPRESS_LIB ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) + set(CLANG_LINKER_WRAPPER_LIB_DEPS clangBasic ) @@ -41,4 +46,5 @@ set(CLANG_LINKER_WRAPPER_LIB_DEPS target_link_libraries(clang-linker-wrapper PRIVATE ${CLANG_LINKER_WRAPPER_LIB_DEPS} + ${SYCL_COMPRESS_LIB} ) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index b0cfc68ee0f8d..a9bf1d727c7a0 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -64,6 +64,8 @@ #include #include +#include + #define COMPILE_OPTS "compile-opts" #define LINK_OPTS "link-opts" diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index 9cb5ec66c644e..aee5e9611a46e 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -12,9 +12,17 @@ set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS clangBasic ) +add_dependencies(clang-offload-wrapper sycl-compress) +include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) +message(CHECK_START "Finding sycl-compress") +set(SYCL_COMPRESS_LIB ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) +set(ZSTD_LIBRARY ${CMAKE_BINARY_DIR}/tools/sycl-compress/zstd/install/lib/libzstd.a) + add_dependencies(clang clang-offload-wrapper) clang_target_link_libraries(clang-offload-wrapper PRIVATE ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} + ${SYCL_COMPRESS_LIB} + ${ZSTD_LIBRARY} ) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 5facbb4329abd..2d099afb27096 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -67,6 +67,9 @@ #include #include +#include +#include + #define OPENMP_OFFLOAD_IMAGE_VERSION "1.0" using namespace llvm; @@ -132,13 +135,24 @@ static cl::opt cl::value_desc("filename"), cl::cat(ClangOffloadWrapperCategory)); -static cl::opt Verbose("v", cl::desc("verbose output"), +static cl::opt Verbose("v", cl::desc("verbose output"), cl::init(true), cl::cat(ClangOffloadWrapperCategory)); static cl::list Inputs(cl::Positional, cl::OneOrMore, cl::desc(""), cl::cat(ClangOffloadWrapperCategory)); +// CLI options for device image compression. +static cl::opt + SYCLCompressDevImg("sycl-compress-dev-imgs", cl::init(true), cl::Optional, + cl::desc("Enable device image compression using ZSTD."), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + SYCLCompressLevel("sycl-compress-level", cl::init(10), cl::Optional, + cl::desc("ZSTD Compression level. Default: 10"), + cl::cat(ClangOffloadWrapperCategory)); + // Binary image formats supported by this tool. The support basically means // mapping string representation given at the command line to a value from this // enum. No format checking is performed. @@ -146,8 +160,12 @@ enum BinaryImageFormat { none, // image kind is not determined native, // image kind is native // portable image kinds go next - spirv, // SPIR-V - llvmbc // LLVM bitcode + spirv, // SPIR-V + llvmbc, // LLVM bitcode + compressed_none, // compressed image with unknown format + compressed_native, // compressed native format + compressed_spirv, // compressed SPIR-V + compressed_llvmbc // compressed LLVM bitcode }; /// Sets offload kind. @@ -265,6 +283,14 @@ static StringRef formatToString(BinaryImageFormat Fmt) { return "llvmbc"; case BinaryImageFormat::native: return "native"; + case BinaryImageFormat::compressed_none: + return "compressed_none"; + case BinaryImageFormat::compressed_native: + return "compressed_native"; + case BinaryImageFormat::compressed_spirv: + return "compressed_spirv"; + case BinaryImageFormat::compressed_llvmbc: + return "compressed_llvmbc"; } llvm_unreachable("bad format"); @@ -416,9 +442,10 @@ class BinaryWrapper { // }; StructType *getEntryTy() { if (!EntryTy) - EntryTy = StructType::create("__tgt_offload_entry", PointerType::getUnqual(C), - PointerType::getUnqual(C), getSizeTTy(), - Type::getInt32Ty(C), Type::getInt32Ty(C)); + EntryTy = + StructType::create("__tgt_offload_entry", PointerType::getUnqual(C), + PointerType::getUnqual(C), getSizeTTy(), + Type::getInt32Ty(C), Type::getInt32Ty(C)); return EntryTy; } @@ -432,9 +459,9 @@ class BinaryWrapper { // }; StructType *getDeviceImageTy() { if (!ImageTy) - ImageTy = StructType::create("__tgt_device_image", PointerType::getUnqual(C), - PointerType::getUnqual(C), getEntryPtrTy(), - getEntryPtrTy()); + ImageTy = StructType::create( + "__tgt_device_image", PointerType::getUnqual(C), + PointerType::getUnqual(C), getEntryPtrTy(), getEntryPtrTy()); return ImageTy; } @@ -482,8 +509,8 @@ class BinaryWrapper { { PointerType::getUnqual(C), // Name PointerType::getUnqual(C), // ValAddr - Type::getInt32Ty(C), // Type - Type::getInt64Ty(C) // ValSize + Type::getInt32Ty(C), // Type + Type::getInt64Ty(C) // ValSize }, "_pi_device_binary_property_struct"); } @@ -505,8 +532,8 @@ class BinaryWrapper { SyclPropSetTy = StructType::create( { PointerType::getUnqual(C), // Name - getSyclPropPtrTy(), // PropertiesBegin - getSyclPropPtrTy() // PropertiesEnd + getSyclPropPtrTy(), // PropertiesBegin + getSyclPropPtrTy() // PropertiesEnd }, "_pi_device_binary_property_set_struct"); } @@ -556,9 +583,9 @@ class BinaryWrapper { if (!SyclImageTy) { SyclImageTy = StructType::create( { - Type::getInt16Ty(C), // Version - Type::getInt8Ty(C), // OffloadKind - Type::getInt8Ty(C), // Format + Type::getInt16Ty(C), // Version + Type::getInt8Ty(C), // OffloadKind + Type::getInt8Ty(C), // Format PointerType::getUnqual(C), // DeviceTargetSpec PointerType::getUnqual(C), // CompileOptions PointerType::getUnqual(C), // LinkOptions @@ -566,10 +593,10 @@ class BinaryWrapper { PointerType::getUnqual(C), // ManifestEnd PointerType::getUnqual(C), // ImageStart PointerType::getUnqual(C), // ImageEnd - getEntryPtrTy(), // EntriesBegin - getEntryPtrTy(), // EntriesEnd - getSyclPropSetPtrTy(), // PropertySetBegin - getSyclPropSetPtrTy() // PropertySetEnd + getEntryPtrTy(), // EntriesBegin + getEntryPtrTy(), // EntriesEnd + getSyclPropSetPtrTy(), // PropertySetBegin + getSyclPropSetPtrTy() // PropertySetEnd }, "__tgt_device_image"); } @@ -931,7 +958,7 @@ class BinaryWrapper { } public: - MemoryBuffer *addELFNotes(MemoryBuffer *Buf, StringRef OriginalFileName); + MemoryBuffer *addELFNotes(MemoryBuffer *Buf, StringRef OriginalFileName); private: /// Creates binary descriptor for the given device images. Binary descriptor @@ -1083,10 +1110,71 @@ class BinaryWrapper { return FBinOrErr.takeError(); Fbin = *FBinOrErr; } else { - Fbin = addDeviceImageToModule( - ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), - Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, - Img.Tgt); + + // Don't compress if the user explicitly specifies the binary image + // format. + if (Kind != OffloadKind::SYCL || !SYCLCompressDevImg || + Img.Fmt != BinaryImageFormat::none) { + Fbin = addDeviceImageToModule( + ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + } else { + + if (Verbose) + errs() << " Compressing device image\n"; + + size_t dstSize; + void *dst = compressBlob(Bin->getBufferStart(), Bin->getBufferSize(), + dstSize, SYCLCompressLevel); + + if (!dstSize) { + if (Verbose) { + errs() << " Compression failed with error:" << (char *)dst + << "\n"; + errs() << " Falling back to uncompressed image\n"; + } + + Fbin = addDeviceImageToModule( + ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + } else { + if (Verbose) + errs() << " Compression succeeded. Original image size:" + << Bin->getBufferSize() + << " Compressed image size:" << dstSize << "\n"; + + errs() << "Image format:" << Img.Fmt << "\n"; + errs() << "Image target:" << Img.Tgt << "\n"; + Fbin = addDeviceImageToModule( + ArrayRef((const char *)dst, dstSize), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + + // Change SPRIV format -> compressed SPIRV format. + BinaryImageFormat CompressedImgFmt; + switch (Img.Fmt) { + case BinaryImageFormat::none: + CompressedImgFmt = BinaryImageFormat::compressed_none; + break; + case BinaryImageFormat::native: + CompressedImgFmt = BinaryImageFormat::compressed_native; + break; + case BinaryImageFormat::spirv: + CompressedImgFmt = BinaryImageFormat::compressed_spirv; + break; + case BinaryImageFormat::llvmbc: + CompressedImgFmt = BinaryImageFormat::compressed_llvmbc; + break; + default: + return createStringError(errc::invalid_argument, + "unsupported image format"); + } + + Ffmt = ConstantInt::get(Type::getInt8Ty(C), CompressedImgFmt); + } + } } if (Kind == OffloadKind::SYCL) { @@ -1318,209 +1406,206 @@ class BinaryWrapper { } }; - // The whole function body is misaligned just to simplify - // conflict resolutions with llorg. - MemoryBuffer *BinaryWrapper::addELFNotes( - MemoryBuffer *Buf, - StringRef OriginalFileName) { - // Cannot add notes, if llvm-objcopy is not available. - // - // I did not find a clean way to add a new notes section into an existing - // ELF file. llvm-objcopy seems to recreate a new ELF from scratch, - // and we just try to use llvm-objcopy here. - if (ObjcopyPath.empty()) - return Buf; +// The whole function body is misaligned just to simplify +// conflict resolutions with llorg. +MemoryBuffer *BinaryWrapper::addELFNotes(MemoryBuffer *Buf, + StringRef OriginalFileName) { + // Cannot add notes, if llvm-objcopy is not available. + // + // I did not find a clean way to add a new notes section into an existing + // ELF file. llvm-objcopy seems to recreate a new ELF from scratch, + // and we just try to use llvm-objcopy here. + if (ObjcopyPath.empty()) + return Buf; - StringRef ToolNameRef(ToolName); + StringRef ToolNameRef(ToolName); - // Helpers to emit warnings. - auto warningOS = [ToolNameRef]() -> raw_ostream & { - return WithColor::warning(errs(), ToolNameRef); - }; - auto handleErrorAsWarning = [&warningOS](Error E) { - logAllUnhandledErrors(std::move(E), warningOS()); - }; + // Helpers to emit warnings. + auto warningOS = [ToolNameRef]() -> raw_ostream & { + return WithColor::warning(errs(), ToolNameRef); + }; + auto handleErrorAsWarning = [&warningOS](Error E) { + logAllUnhandledErrors(std::move(E), warningOS()); + }; - Expected> BinOrErr = - ObjectFile::createELFObjectFile(Buf->getMemBufferRef(), - /*InitContent=*/false); - if (Error E = BinOrErr.takeError()) { - consumeError(std::move(E)); - // This warning is questionable, but let it be here, - // assuming that most OpenMP offload models use ELF offload images. - warningOS() << OriginalFileName - << " is not an ELF image, so notes cannot be added to it.\n"; - return Buf; - } + Expected> BinOrErr = + ObjectFile::createELFObjectFile(Buf->getMemBufferRef(), + /*InitContent=*/false); + if (Error E = BinOrErr.takeError()) { + consumeError(std::move(E)); + // This warning is questionable, but let it be here, + // assuming that most OpenMP offload models use ELF offload images. + warningOS() << OriginalFileName + << " is not an ELF image, so notes cannot be added to it.\n"; + return Buf; + } - // If we fail to add the note section, we just pass through the original - // ELF image for wrapping. At some point we should enforce the note section - // and start emitting errors vs warnings. - endianness Endianness; - if (isa(BinOrErr->get()) || - isa(BinOrErr->get())) { - Endianness = endianness::little; - } else if (isa(BinOrErr->get()) || - isa(BinOrErr->get())) { - Endianness = endianness::big; - } else { - warningOS() << OriginalFileName - << " is an ELF image of unrecognized format.\n"; - return Buf; - } + // If we fail to add the note section, we just pass through the original + // ELF image for wrapping. At some point we should enforce the note section + // and start emitting errors vs warnings. + endianness Endianness; + if (isa(BinOrErr->get()) || + isa(BinOrErr->get())) { + Endianness = endianness::little; + } else if (isa(BinOrErr->get()) || + isa(BinOrErr->get())) { + Endianness = endianness::big; + } else { + warningOS() << OriginalFileName + << " is an ELF image of unrecognized format.\n"; + return Buf; + } - // Create temporary file for the data of a new SHT_NOTE section. - // We fill it in with data and then pass to llvm-objcopy invocation - // for reading. - Twine NotesFileModel = OriginalFileName + Twine(".elfnotes.%%%%%%%.tmp"); - Expected NotesTemp = - sys::fs::TempFile::create(NotesFileModel); - if (Error E = NotesTemp.takeError()) { - handleErrorAsWarning(createFileError(NotesFileModel, std::move(E))); - return Buf; - } - TempFiles.push_back(NotesTemp->TmpName); - - // Create temporary file for the updated ELF image. - // This is an empty file that we pass to llvm-objcopy invocation - // for writing. - Twine ELFFileModel = OriginalFileName + Twine(".elfwithnotes.%%%%%%%.tmp"); - Expected ELFTemp = - sys::fs::TempFile::create(ELFFileModel); - if (Error E = ELFTemp.takeError()) { - handleErrorAsWarning(createFileError(ELFFileModel, std::move(E))); - return Buf; - } - TempFiles.push_back(ELFTemp->TmpName); - - // Keep the new ELF image file to reserve the name for the future - // llvm-objcopy invocation. - std::string ELFTmpFileName = ELFTemp->TmpName; - if (Error E = ELFTemp->keep(ELFTmpFileName)) { - handleErrorAsWarning(createFileError(ELFTmpFileName, std::move(E))); - return Buf; - } + // Create temporary file for the data of a new SHT_NOTE section. + // We fill it in with data and then pass to llvm-objcopy invocation + // for reading. + Twine NotesFileModel = OriginalFileName + Twine(".elfnotes.%%%%%%%.tmp"); + Expected NotesTemp = + sys::fs::TempFile::create(NotesFileModel); + if (Error E = NotesTemp.takeError()) { + handleErrorAsWarning(createFileError(NotesFileModel, std::move(E))); + return Buf; + } + TempFiles.push_back(NotesTemp->TmpName); + + // Create temporary file for the updated ELF image. + // This is an empty file that we pass to llvm-objcopy invocation + // for writing. + Twine ELFFileModel = OriginalFileName + Twine(".elfwithnotes.%%%%%%%.tmp"); + Expected ELFTemp = sys::fs::TempFile::create(ELFFileModel); + if (Error E = ELFTemp.takeError()) { + handleErrorAsWarning(createFileError(ELFFileModel, std::move(E))); + return Buf; + } + TempFiles.push_back(ELFTemp->TmpName); + + // Keep the new ELF image file to reserve the name for the future + // llvm-objcopy invocation. + std::string ELFTmpFileName = ELFTemp->TmpName; + if (Error E = ELFTemp->keep(ELFTmpFileName)) { + handleErrorAsWarning(createFileError(ELFTmpFileName, std::move(E))); + return Buf; + } - // Write notes to the *elfnotes*.tmp file. - raw_fd_ostream NotesOS(NotesTemp->FD, false); + // Write notes to the *elfnotes*.tmp file. + raw_fd_ostream NotesOS(NotesTemp->FD, false); - struct NoteTy { - // Note name is a null-terminated "LLVMOMPOFFLOAD". - std::string Name; - // Note type defined in llvm/include/llvm/BinaryFormat/ELF.h. - uint32_t Type = 0; - // Each note has type-specific associated data. - std::string Desc; + struct NoteTy { + // Note name is a null-terminated "LLVMOMPOFFLOAD". + std::string Name; + // Note type defined in llvm/include/llvm/BinaryFormat/ELF.h. + uint32_t Type = 0; + // Each note has type-specific associated data. + std::string Desc; - NoteTy(std::string &&Name, uint32_t Type, std::string &&Desc) - : Name(std::move(Name)), Type(Type), Desc(std::move(Desc)) {} - }; + NoteTy(std::string &&Name, uint32_t Type, std::string &&Desc) + : Name(std::move(Name)), Type(Type), Desc(std::move(Desc)) {} + }; - // So far we emit just three notes. - SmallVector Notes; - // Version of the offload image identifying the structure of the ELF image. - // Version 1.0 does not have any specific requirements. - // We may come up with some structure that has to be honored by all - // offload implementations in future (e.g. to let libomptarget - // get some information from the offload image). - Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION, - OPENMP_OFFLOAD_IMAGE_VERSION); - // This is a producer identification string. We are LLVM! - Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER, - "LLVM"); - // This is a producer version. Use the same format that is used - // by clang to report the LLVM version. - Notes.emplace_back("LLVMOMPOFFLOAD", - ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION, - LLVM_VERSION_STRING + // So far we emit just three notes. + SmallVector Notes; + // Version of the offload image identifying the structure of the ELF image. + // Version 1.0 does not have any specific requirements. + // We may come up with some structure that has to be honored by all + // offload implementations in future (e.g. to let libomptarget + // get some information from the offload image). + Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION, + OPENMP_OFFLOAD_IMAGE_VERSION); + // This is a producer identification string. We are LLVM! + Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER, + "LLVM"); + // This is a producer version. Use the same format that is used + // by clang to report the LLVM version. + Notes.emplace_back("LLVMOMPOFFLOAD", + ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION, + LLVM_VERSION_STRING #ifdef LLVM_REVISION - " " LLVM_REVISION + " " LLVM_REVISION #endif - ); - - // Return the amount of padding required for a blob of N bytes - // to be aligned to Alignment bytes. - auto getPadAmount = [](uint32_t N, uint32_t Alignment) -> uint32_t { - uint32_t Mod = (N % Alignment); - if (Mod == 0) - return 0; - return Alignment - Mod; - }; - auto emitPadding = [&getPadAmount](raw_ostream &OS, uint32_t Size) { - for (uint32_t I = 0; I < getPadAmount(Size, 4); ++I) - OS << '\0'; - }; + ); + + // Return the amount of padding required for a blob of N bytes + // to be aligned to Alignment bytes. + auto getPadAmount = [](uint32_t N, uint32_t Alignment) -> uint32_t { + uint32_t Mod = (N % Alignment); + if (Mod == 0) + return 0; + return Alignment - Mod; + }; + auto emitPadding = [&getPadAmount](raw_ostream &OS, uint32_t Size) { + for (uint32_t I = 0; I < getPadAmount(Size, 4); ++I) + OS << '\0'; + }; - // Put notes into the file. - for (auto &N : Notes) { - assert(!N.Name.empty() && "We should not create notes with empty names."); - // Name must be null-terminated. - if (N.Name.back() != '\0') - N.Name += '\0'; - uint32_t NameSz = N.Name.size(); - uint32_t DescSz = N.Desc.size(); - // A note starts with three 4-byte values: - // NameSz - // DescSz - // Type - // These three fields are endian-sensitive. - support::endian::write(NotesOS, NameSz, Endianness); - support::endian::write(NotesOS, DescSz, Endianness); - support::endian::write(NotesOS, N.Type, Endianness); - // Next, we have a null-terminated Name padded to a 4-byte boundary. - NotesOS << N.Name; - emitPadding(NotesOS, NameSz); - if (DescSz == 0) - continue; - // Finally, we have a descriptor, which is an arbitrary flow of bytes. - NotesOS << N.Desc; - emitPadding(NotesOS, DescSz); - } - NotesOS.flush(); + // Put notes into the file. + for (auto &N : Notes) { + assert(!N.Name.empty() && "We should not create notes with empty names."); + // Name must be null-terminated. + if (N.Name.back() != '\0') + N.Name += '\0'; + uint32_t NameSz = N.Name.size(); + uint32_t DescSz = N.Desc.size(); + // A note starts with three 4-byte values: + // NameSz + // DescSz + // Type + // These three fields are endian-sensitive. + support::endian::write(NotesOS, NameSz, Endianness); + support::endian::write(NotesOS, DescSz, Endianness); + support::endian::write(NotesOS, N.Type, Endianness); + // Next, we have a null-terminated Name padded to a 4-byte boundary. + NotesOS << N.Name; + emitPadding(NotesOS, NameSz); + if (DescSz == 0) + continue; + // Finally, we have a descriptor, which is an arbitrary flow of bytes. + NotesOS << N.Desc; + emitPadding(NotesOS, DescSz); + } + NotesOS.flush(); - // Keep the notes file. - std::string NotesTmpFileName = NotesTemp->TmpName; - if (Error E = NotesTemp->keep(NotesTmpFileName)) { - handleErrorAsWarning(createFileError(NotesTmpFileName, std::move(E))); - return Buf; - } + // Keep the notes file. + std::string NotesTmpFileName = NotesTemp->TmpName; + if (Error E = NotesTemp->keep(NotesTmpFileName)) { + handleErrorAsWarning(createFileError(NotesTmpFileName, std::move(E))); + return Buf; + } - // Run llvm-objcopy like this: - // llvm-objcopy --add-section=.note.openmp= \ + // Run llvm-objcopy like this: + // llvm-objcopy --add-section=.note.openmp= \ // - // - // This will add a SHT_NOTE section on top of the original ELF. - std::vector Args; - Args.push_back(ObjcopyPath); - std::string Option("--add-section=.note.openmp=" + NotesTmpFileName); - Args.push_back(Option); - Args.push_back(OriginalFileName); - Args.push_back(ELFTmpFileName); - bool ExecutionFailed = false; - std::string ErrMsg; - (void)sys::ExecuteAndWait(ObjcopyPath, Args, - /*Env=*/std::nullopt, /*Redirects=*/{}, - /*SecondsToWait=*/0, - /*MemoryLimit=*/0, &ErrMsg, &ExecutionFailed); - - if (ExecutionFailed) { - warningOS() << ErrMsg << "\n"; - return Buf; - } - - // Substitute the original ELF with new one. - ErrorOr> BufOrErr = - MemoryBuffer::getFile(ELFTmpFileName); - if (!BufOrErr) { - handleErrorAsWarning( - createFileError(ELFTmpFileName, BufOrErr.getError())); - return Buf; - } + // + // This will add a SHT_NOTE section on top of the original ELF. + std::vector Args; + Args.push_back(ObjcopyPath); + std::string Option("--add-section=.note.openmp=" + NotesTmpFileName); + Args.push_back(Option); + Args.push_back(OriginalFileName); + Args.push_back(ELFTmpFileName); + bool ExecutionFailed = false; + std::string ErrMsg; + (void)sys::ExecuteAndWait(ObjcopyPath, Args, + /*Env=*/std::nullopt, /*Redirects=*/{}, + /*SecondsToWait=*/0, + /*MemoryLimit=*/0, &ErrMsg, &ExecutionFailed); + + if (ExecutionFailed) { + warningOS() << ErrMsg << "\n"; + return Buf; + } - AutoGcBufs.emplace_back(std::move(*BufOrErr)); - return AutoGcBufs.back().get(); + // Substitute the original ELF with new one. + ErrorOr> BufOrErr = + MemoryBuffer::getFile(ELFTmpFileName); + if (!BufOrErr) { + handleErrorAsWarning(createFileError(ELFTmpFileName, BufOrErr.getError())); + return Buf; } + AutoGcBufs.emplace_back(std::move(*BufOrErr)); + return AutoGcBufs.back().get(); +} + llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, const BinaryWrapper::Image &Img) { Out << "\n{\n"; @@ -1587,7 +1672,7 @@ template class ListArgsSequencer { /// The only constructor. /// Sz - total number of options on the command line /// Args - the cl::list objects to sequence elements of - ListArgsSequencer(size_t Sz, Tys &... Args) + ListArgsSequencer(size_t Sz, Tys &...Args) : Prevs(Args.end()...), Iters(Args.begin()...) { // make OptListIDs big enough to hold IDs of all options coming from the // command line and initialize all IDs to default class -1 diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 671f55fb149c7..6ebfcd4c39b12 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -29,7 +29,7 @@ include_directories(${ZSTD_INCLUDE_DIR}) add_subdirectory(src) add_dependencies(sycl-compress zstd) -target_link_libraries(sycl-compress ${ZSTD_LIBRARY}) +#target_link_libraries(sycl-compress ${ZSTD_LIBRARY}) if (LLVM_BINARY_DIR) file(GLOB_RECURSE SYCL_COMPRESS_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress/*") @@ -53,4 +53,6 @@ include(GNUInstallDirs) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT sycl-compress -) \ No newline at end of file +) + +set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress) \ No newline at end of file diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index d2a5425e9334f..c00cd265c9016 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1 +1,4 @@ -int foo(int a); \ No newline at end of file + + +char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); +char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); \ No newline at end of file diff --git a/sycl-compress/src/CMakeLists.txt b/sycl-compress/src/CMakeLists.txt index c49ec98722eb6..8a40caeb90195 100644 --- a/sycl-compress/src/CMakeLists.txt +++ b/sycl-compress/src/CMakeLists.txt @@ -2,8 +2,8 @@ include(GNUInstallDirs) macro(add_syclcompress_lib target_name) add_library(${target_name} STATIC ${ARGN}) - target_compile_definitions(${target_name} PRIVATE -DLIB_SYCL_COMPRESS_STATIC_LIBRARY) target_include_directories(${target_name} PRIVATE ${SYCL_COMPRESS_DIR}/include ${ZSTD_INCLUDE_DIR}) + target_link_libraries(${target_name} PRIVATE ${ZSTD_LIBRARY}) # if (MSVC) # target_compile_options(${target_name} PRIVATE /EHsc) diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index 6b2e33088d90d..cb702a32243e0 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -1,14 +1,56 @@ +#include #include + +#include #include -__attribute__((visibility("default"))) int compressBlob(void *src, size_t srcSize, - void *dst, int level) { - void* dstBuffer = malloc(srcSize); - size_t dstSize = ZSTD_compress(src, srcSize, dstBuffer, srcSize, level); - dst = dstBuffer; - return dstSize; +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +__attribute__((visibility("default"))) char * +compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { + auto dstBufferSize = ZSTD_compressBound(srcSize); + char *dstBuffer = static_cast(malloc(dstBufferSize)); + dstSize = ZSTD_compress(static_cast(dstBuffer), dstBufferSize, + static_cast(src), srcSize, level); + + // In case of compression error, return the error message and set dstSize to + // 0. + if (ZSTD_isError(dstSize)) { + std::cerr << "Error: " << ZSTD_getErrorName(dstSize) << "\n"; + strncpy(dstBuffer, ZSTD_getErrorName(dstSize), dstBufferSize); + dstSize = 0; + } + + return dstBuffer; } -int main() { - return 0; +__attribute__((visibility("default"))) char * +decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { + // Size of decompressed image can be larger than what we can allocate + // on heap. In that case, we need to use streaming decompression. + // TODO: Throw if the decompression size is too large. + auto dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); + + if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || + dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { + std::cerr << "Error determining size of uncompressed data\n"; + std::cerr << "Error: " << ZSTD_getErrorName(dstBufferSize) << "\n"; + dstSize = 0; + return nullptr; + } + + char *dstBuffer = static_cast(malloc(dstBufferSize)); + dstSize = ZSTD_decompress(static_cast(dstBuffer), dstBufferSize, + static_cast(src), srcSize); + + // In case of decompression error, return the error message and set dstSize to + // 0. + if (ZSTD_isError(dstSize)) { + std::cerr << "Error: " << ZSTD_getErrorName(dstSize) << "\n"; + strncpy(dstBuffer, ZSTD_getErrorName(dstSize), dstBufferSize); + dstSize = 0; + } + + return dstBuffer; } \ No newline at end of file diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 1d5859cca5016..3ef481bd1b7f6 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -354,6 +354,7 @@ add_custom_target(sycl-compiler spirv-to-ir-wrapper sycl-post-link opencl-aot + sycl-compress ) add_custom_target( sycl-runtime-libraries @@ -450,6 +451,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-sycl-dev ${XPTIFW_LIBS} ${SYCL_TOOLCHAIN_DEPS} + sycl-compress ) if (WIN32) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index d02dbb725637a..1ea0199e7b564 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,6 +69,10 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() + include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) + target_link_libraries(${LIB_NAME} PRIVATE ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) + target_link_libraries(${LIB_NAME} PRIVATE ${CMAKE_BINARY_DIR}/tools/sycl-compress/zstd/install/lib/libzstd.a) + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) # ur_win_proxy_loader diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 9d2777e863ee1..7b42fabc85bbd 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -115,7 +115,11 @@ enum sycl_device_binary_type : uint8_t { SYCL_DEVICE_BINARY_TYPE_NONE = 0, // undetermined SYCL_DEVICE_BINARY_TYPE_NATIVE = 1, // specific to a device SYCL_DEVICE_BINARY_TYPE_SPIRV = 2, - SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3 + SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3, + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4, + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE = 5, + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV = 6, + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE = 7 }; // Device binary descriptor version supported by this library. diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index beb9bae0dd0f1..cdf7d989ae545 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include @@ -167,28 +169,68 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { // it when invoking the offload wrapper job Format = static_cast(Bin->Format); + bool isCompressed = false; + switch (Format) { + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: + Format = SYCL_DEVICE_BINARY_TYPE_NONE; + isCompressed = true; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: + Format = SYCL_DEVICE_BINARY_TYPE_NATIVE; + isCompressed = true; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: + Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; + isCompressed = true; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: + Format = SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + isCompressed = true; + break; + default: + break; + } + + if (isCompressed) { + size_t DecompressedSize = 0; + char *DecompressedData = + decompressBlob(reinterpret_cast(Bin->BinaryStart), + getSize(), DecompressedSize); + if (!DecompressedSize) { + std::cerr << "Failed to decompress device binary image\n"; + return; + } + + this->Bin = new sycl_device_binary_struct(*Bin); + this->Bin->BinaryStart = + reinterpret_cast(DecompressedData); + this->Bin->BinaryEnd = this->Bin->BinaryStart + DecompressedSize; + } + if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" - Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); + Format = ur::getBinaryImageFormat(this->Bin->BinaryStart, getSize()); - SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); + SpecConstIDMap.init(this->Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( - Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); - DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); - KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); - AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); - ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); + this->Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); + DeviceLibReqMask.init(this->Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); + KernelParamOptInfo.init(this->Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); + AssertUsed.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); + ProgramMetadata.init(this->Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); // Convert ProgramMetadata into the UR format for (const auto &Prop : ProgramMetadata) { ProgramMetadataUR.push_back( ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); } - ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); - ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); - DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); - DeviceRequirements.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS); - HostPipes.init(Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); - VirtualFunctions.init(Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); + + ExportedSymbols.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); + ImportedSymbols.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); + DeviceGlobals.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); + DeviceRequirements.init(this->Bin, + __SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS); + HostPipes.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); + VirtualFunctions.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); ImageId = ImageCounter++; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3ed9fc8aa9a08..4958d5c58fbd5 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -117,9 +117,8 @@ static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context, } // TODO replace this with a new UR API function -static bool -isDeviceBinaryTypeSupported(const context &C, - ur::DeviceBinaryType Format) { +static bool isDeviceBinaryTypeSupported(const context &C, + ur::DeviceBinaryType Format) { // All formats except SYCL_DEVICE_BINARY_TYPE_SPIRV are supported. if (Format != SYCL_DEVICE_BINARY_TYPE_SPIRV) return true; @@ -532,21 +531,19 @@ static const char *getUrDeviceTarget(const char *URDeviceTarget) { return UR_DEVICE_BINARY_TARGET_SPIRV32; else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) return UR_DEVICE_BINARY_TARGET_SPIRV64; - else if (strcmp(URDeviceTarget, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) - return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; - else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) + return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) return UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - else if (strcmp(URDeviceTarget, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == + 0) return UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) return UR_DEVICE_BINARY_TARGET_NVPTX64; else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) return UR_DEVICE_BINARY_TARGET_AMDGCN; - else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == - 0) + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) return "native_cpu"; // todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU; return UR_DEVICE_BINARY_TARGET_UNKNOWN; @@ -2700,8 +2697,8 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, ExtraProgramsToLink); ur_kernel_handle_t UrKernel{nullptr}; - Plugin->call(urKernelCreate, - BuildProgram.get(), KernelName.c_str(), &UrKernel); + Plugin->call(urKernelCreate, BuildProgram.get(), + KernelName.c_str(), &UrKernel); { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); m_MaterializedKernels[KernelName][SpecializationConsts] = UrKernel; From 45f1e991fb51f4303fa77743ece09274338315b9 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 17 Aug 2024 16:29:48 -0700 Subject: [PATCH 03/50] Cleanup --- sycl/source/detail/device_binary_image.cpp | 89 +++++++++++-------- sycl/source/detail/device_binary_image.hpp | 16 +++- .../program_manager/program_manager.cpp | 22 ++++- .../program_manager/program_manager.hpp | 12 +-- 4 files changed, 93 insertions(+), 46 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index cdf7d989ae545..fcd183666f871 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -169,44 +169,6 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { // it when invoking the offload wrapper job Format = static_cast(Bin->Format); - bool isCompressed = false; - switch (Format) { - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: - Format = SYCL_DEVICE_BINARY_TYPE_NONE; - isCompressed = true; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: - Format = SYCL_DEVICE_BINARY_TYPE_NATIVE; - isCompressed = true; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: - Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; - isCompressed = true; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: - Format = SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; - isCompressed = true; - break; - default: - break; - } - - if (isCompressed) { - size_t DecompressedSize = 0; - char *DecompressedData = - decompressBlob(reinterpret_cast(Bin->BinaryStart), - getSize(), DecompressedSize); - if (!DecompressedSize) { - std::cerr << "Failed to decompress device binary image\n"; - return; - } - - this->Bin = new sycl_device_binary_struct(*Bin); - this->Bin->BinaryStart = - reinterpret_cast(DecompressedData); - this->Bin->BinaryEnd = this->Bin->BinaryStart + DecompressedSize; - } - if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" Format = ur::getBinaryImageFormat(this->Bin->BinaryStart, getSize()); @@ -268,6 +230,57 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } +CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( + sycl_device_binary CompressedBin) + : RTDeviceBinaryImage() { + + // Decompress the binary image. + size_t DecompressedSize = 0; + size_t compressedDataSize = static_cast(CompressedBin->BinaryEnd - + CompressedBin->BinaryStart); + char *DecompressedData = + decompressBlob(reinterpret_cast(CompressedBin->BinaryStart), + compressedDataSize, DecompressedSize); + + if (!DecompressedSize) { + std::cerr << "Failed to decompress device binary image\n"; + return; + } + + Bin = new sycl_device_binary_struct(*CompressedBin); + Bin->BinaryStart = reinterpret_cast(DecompressedData); + Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; + + // Get the new format. + auto currFormat = static_cast(Bin->Format); + switch (currFormat) { + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: + currFormat = SYCL_DEVICE_BINARY_TYPE_NONE; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: + currFormat = SYCL_DEVICE_BINARY_TYPE_NATIVE; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: + currFormat = SYCL_DEVICE_BINARY_TYPE_SPIRV; + break; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: + currFormat = SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + break; + default: + break; + } + Bin->Format = currFormat; + + init(Bin); +} + +CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { + // De-allocate the decompressed image. + delete Bin->BinaryStart; + delete Bin; + Bin = nullptr; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 49047a04ae77c..9a376cdf6351a 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #pragma once +#include "ur_utils.hpp" #include #include #include #include #include -#include "ur_utils.hpp" #include @@ -276,6 +276,20 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { std::unique_ptr Data; }; +// Compressed device binary image. It decompresses the binary image on +// construction and stores the decompressed data as RTDeviceBinaryImage. +// Also, frees the decompressed data in destructor. +class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { +public: + CompressedRTDeviceBinaryImage(sycl_device_binary Bin); + ~CompressedRTDeviceBinaryImage() override; + + void print() const override { + RTDeviceBinaryImage::print(); + std::cerr << " COMPRESSED\n"; + } +}; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 4958d5c58fbd5..bd8b2527de79a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1570,7 +1570,12 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { if (EntriesB == EntriesE) continue; - auto Img = std::make_unique(RawImg); + std::unique_ptr Img; + if (isDeviceImageCompressed(RawImg)) + Img = std::move(std::make_unique(RawImg)); + else + Img = std::move(std::make_unique(RawImg)); + static uint32_t SequenceID = 0; // Fill the kernel argument mask map @@ -2707,6 +2712,21 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( return UrKernel; } +// Check if device image is compressed. +inline bool +ProgramManager::isDeviceImageCompressed(sycl_device_binary Bin) const { + + auto currFormat = static_cast(Bin->Format); + + if (currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE || + currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE || + currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV || + currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE) + return true; + else + return false; +} + bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img) { return !checkDevSupportDeviceRequirements(Dev, Img).has_value(); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index f973e8043b769..cd3859afd468f 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -123,14 +123,11 @@ class ProgramManager { /// \return A pair consisting of the UR program created with the corresponding /// device code binary and a boolean that is true if the device code /// binary was found in the persistent cache and false otherwise. - std::pair - getOrCreateURProgram( + std::pair getOrCreateURProgram( const RTDeviceBinaryImage &Img, const std::vector &AllImages, - const context &Context, - const device &Device, - const std::string &CompileAndLinkOptions, - SerializedObj SpecConsts); + const context &Context, const device &Device, + const std::string &CompileAndLinkOptions, SerializedObj SpecConsts); /// Builds or retrieves from cache a program defining the kernel with given /// name. /// \param M identifies the OS module the kernel comes from (multiple OS @@ -322,6 +319,9 @@ class ProgramManager { collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, device Dev); + // Returns whether the device image is compressed or not. + inline bool isDeviceImageCompressed(sycl_device_binary Bin) const; + /// The three maps below are used during kernel resolution. Any kernel is /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; From 34978f80648d1456a56bd0bb0a9fa4e22520b327 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 18 Aug 2024 10:04:06 -0700 Subject: [PATCH 04/50] Fix ZSTD Cmake dependencies --- clang/tools/clang-linker-wrapper/CMakeLists.txt | 5 ++--- clang/tools/clang-offload-wrapper/CMakeLists.txt | 7 ++----- sycl-compress/CMakeLists.txt | 10 ++++++---- sycl-compress/src/CMakeLists.txt | 4 ++-- sycl/source/CMakeLists.txt | 4 ++-- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 171e173eeaea3..c9c776a3c44fa 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -36,15 +36,14 @@ add_clang_tool(clang-linker-wrapper add_dependencies(clang-linker-wrapper sycl-compress) include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) -message(CHECK_START "Finding sycl-compress") -set(SYCL_COMPRESS_LIB ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) set(CLANG_LINKER_WRAPPER_LIB_DEPS clangBasic ) +#find_library(SYCL_COMPRESS_LIB sycl-compress PATHS ${CMAKE_BINARY_DIR}/lib) target_link_libraries(clang-linker-wrapper PRIVATE ${CLANG_LINKER_WRAPPER_LIB_DEPS} - ${SYCL_COMPRESS_LIB} + sycl-compress ) diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index aee5e9611a46e..d9e3b7660ea4b 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -14,15 +14,12 @@ set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS add_dependencies(clang-offload-wrapper sycl-compress) include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) -message(CHECK_START "Finding sycl-compress") -set(SYCL_COMPRESS_LIB ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) -set(ZSTD_LIBRARY ${CMAKE_BINARY_DIR}/tools/sycl-compress/zstd/install/lib/libzstd.a) add_dependencies(clang clang-offload-wrapper) +#find_library(SYCL_COMPRESS_LIB sycl-compress PATHS ${CMAKE_BINARY_DIR}/lib NO_DEFAULT_PATH) clang_target_link_libraries(clang-offload-wrapper PRIVATE ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} - ${SYCL_COMPRESS_LIB} - ${ZSTD_LIBRARY} + sycl-compress ) diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 6ebfcd4c39b12..852c40fb1daaf 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -16,20 +16,22 @@ set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) # Download and build zstd include(ExternalProject) ExternalProject_Add(zstd + DEPENDS URL https://github.com/facebook/zstd/releases/download/v1.5.6/zstd-1.5.6.tar.gz URL_HASH SHA256=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 SOURCE_SUBDIR build/cmake - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/zstd/install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -) + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/zstd/install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_MULTITHREAD_SUPPORT=OFF + BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a" + ) -set(ZSTD_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a) +set(ZSTD_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a CACHE PATH "ZSTD library" FORCE) set(ZSTD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/include) include_directories(${ZSTD_INCLUDE_DIR}) add_subdirectory(src) add_dependencies(sycl-compress zstd) -#target_link_libraries(sycl-compress ${ZSTD_LIBRARY}) +target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) if (LLVM_BINARY_DIR) file(GLOB_RECURSE SYCL_COMPRESS_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress/*") diff --git a/sycl-compress/src/CMakeLists.txt b/sycl-compress/src/CMakeLists.txt index 8a40caeb90195..f0ccc1141220a 100644 --- a/sycl-compress/src/CMakeLists.txt +++ b/sycl-compress/src/CMakeLists.txt @@ -1,9 +1,9 @@ include(GNUInstallDirs) macro(add_syclcompress_lib target_name) - add_library(${target_name} STATIC ${ARGN}) + + add_library(${target_name} SHARED ${ARGN}) target_include_directories(${target_name} PRIVATE ${SYCL_COMPRESS_DIR}/include ${ZSTD_INCLUDE_DIR}) - target_link_libraries(${target_name} PRIVATE ${ZSTD_LIBRARY}) # if (MSVC) # target_compile_options(${target_name} PRIVATE /EHsc) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 1ea0199e7b564..030e42dd31c9e 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,9 +69,9 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() + # Include sycl-compress headers and link library. include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) - target_link_libraries(${LIB_NAME} PRIVATE ${CMAKE_BINARY_DIR}/lib/libsycl-compress.a) - target_link_libraries(${LIB_NAME} PRIVATE ${CMAKE_BINARY_DIR}/tools/sycl-compress/zstd/install/lib/libzstd.a) + target_link_libraries(${LIB_NAME} PRIVATE sycl-compress) target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) From cd64225835c10f1bdf128df17284590f395360aa Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 18 Aug 2024 11:01:40 -0700 Subject: [PATCH 05/50] Remove unwanted formatting changes --- .../ClangOffloadWrapper.cpp | 361 +++++++++--------- sycl-compress/CMakeLists.txt | 2 +- .../include/sycl-compress/sycl-compress.h | 2 +- sycl-compress/src/CMakeLists.txt | 2 +- sycl-compress/src/sycl-compress.cpp | 2 +- 5 files changed, 181 insertions(+), 188 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index c0e483f83d134..f511115758f1c 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -135,7 +135,7 @@ static cl::opt cl::value_desc("filename"), cl::cat(ClangOffloadWrapperCategory)); -static cl::opt Verbose("v", cl::desc("verbose output"), cl::init(true), +static cl::opt Verbose("v", cl::desc("verbose output"), cl::cat(ClangOffloadWrapperCategory)); static cl::list Inputs(cl::Positional, cl::OneOrMore, @@ -442,10 +442,9 @@ class BinaryWrapper { // }; StructType *getEntryTy() { if (!EntryTy) - EntryTy = - StructType::create("__tgt_offload_entry", PointerType::getUnqual(C), - PointerType::getUnqual(C), getSizeTTy(), - Type::getInt32Ty(C), Type::getInt32Ty(C)); + EntryTy = StructType::create("__tgt_offload_entry", PointerType::getUnqual(C), + PointerType::getUnqual(C), getSizeTTy(), + Type::getInt32Ty(C), Type::getInt32Ty(C)); return EntryTy; } @@ -459,9 +458,9 @@ class BinaryWrapper { // }; StructType *getDeviceImageTy() { if (!ImageTy) - ImageTy = StructType::create( - "__tgt_device_image", PointerType::getUnqual(C), - PointerType::getUnqual(C), getEntryPtrTy(), getEntryPtrTy()); + ImageTy = StructType::create("__tgt_device_image", PointerType::getUnqual(C), + PointerType::getUnqual(C), getEntryPtrTy(), + getEntryPtrTy()); return ImageTy; } @@ -509,8 +508,8 @@ class BinaryWrapper { { PointerType::getUnqual(C), // Name PointerType::getUnqual(C), // ValAddr - Type::getInt32Ty(C), // Type - Type::getInt64Ty(C) // ValSize + Type::getInt32Ty(C), // Type + Type::getInt64Ty(C) // ValSize }, "_pi_device_binary_property_struct"); } @@ -532,8 +531,8 @@ class BinaryWrapper { SyclPropSetTy = StructType::create( { PointerType::getUnqual(C), // Name - getSyclPropPtrTy(), // PropertiesBegin - getSyclPropPtrTy() // PropertiesEnd + getSyclPropPtrTy(), // PropertiesBegin + getSyclPropPtrTy() // PropertiesEnd }, "_pi_device_binary_property_set_struct"); } @@ -583,9 +582,9 @@ class BinaryWrapper { if (!SyclImageTy) { SyclImageTy = StructType::create( { - Type::getInt16Ty(C), // Version - Type::getInt8Ty(C), // OffloadKind - Type::getInt8Ty(C), // Format + Type::getInt16Ty(C), // Version + Type::getInt8Ty(C), // OffloadKind + Type::getInt8Ty(C), // Format PointerType::getUnqual(C), // DeviceTargetSpec PointerType::getUnqual(C), // CompileOptions PointerType::getUnqual(C), // LinkOptions @@ -593,10 +592,10 @@ class BinaryWrapper { PointerType::getUnqual(C), // ManifestEnd PointerType::getUnqual(C), // ImageStart PointerType::getUnqual(C), // ImageEnd - getEntryPtrTy(), // EntriesBegin - getEntryPtrTy(), // EntriesEnd - getSyclPropSetPtrTy(), // PropertySetBegin - getSyclPropSetPtrTy() // PropertySetEnd + getEntryPtrTy(), // EntriesBegin + getEntryPtrTy(), // EntriesEnd + getSyclPropSetPtrTy(), // PropertySetBegin + getSyclPropSetPtrTy() // PropertySetEnd }, "__tgt_device_image"); } @@ -958,7 +957,7 @@ class BinaryWrapper { } public: - MemoryBuffer *addELFNotes(MemoryBuffer *Buf, StringRef OriginalFileName); + MemoryBuffer *addELFNotes(MemoryBuffer *Buf, StringRef OriginalFileName); private: /// Creates binary descriptor for the given device images. Binary descriptor @@ -1406,173 +1405,175 @@ class BinaryWrapper { } }; -// The whole function body is misaligned just to simplify -// conflict resolutions with llorg. -MemoryBuffer *BinaryWrapper::addELFNotes(MemoryBuffer *Buf, - StringRef OriginalFileName) { - // Cannot add notes, if llvm-objcopy is not available. - // - // I did not find a clean way to add a new notes section into an existing - // ELF file. llvm-objcopy seems to recreate a new ELF from scratch, - // and we just try to use llvm-objcopy here. - if (ObjcopyPath.empty()) - return Buf; + // The whole function body is misaligned just to simplify + // conflict resolutions with llorg. + MemoryBuffer *BinaryWrapper::addELFNotes( + MemoryBuffer *Buf, + StringRef OriginalFileName) { + // Cannot add notes, if llvm-objcopy is not available. + // + // I did not find a clean way to add a new notes section into an existing + // ELF file. llvm-objcopy seems to recreate a new ELF from scratch, + // and we just try to use llvm-objcopy here. + if (ObjcopyPath.empty()) + return Buf; - StringRef ToolNameRef(ToolName); + StringRef ToolNameRef(ToolName); - // Helpers to emit warnings. - auto warningOS = [ToolNameRef]() -> raw_ostream & { - return WithColor::warning(errs(), ToolNameRef); - }; - auto handleErrorAsWarning = [&warningOS](Error E) { - logAllUnhandledErrors(std::move(E), warningOS()); - }; + // Helpers to emit warnings. + auto warningOS = [ToolNameRef]() -> raw_ostream & { + return WithColor::warning(errs(), ToolNameRef); + }; + auto handleErrorAsWarning = [&warningOS](Error E) { + logAllUnhandledErrors(std::move(E), warningOS()); + }; - Expected> BinOrErr = - ObjectFile::createELFObjectFile(Buf->getMemBufferRef(), - /*InitContent=*/false); - if (Error E = BinOrErr.takeError()) { - consumeError(std::move(E)); - // This warning is questionable, but let it be here, - // assuming that most OpenMP offload models use ELF offload images. - warningOS() << OriginalFileName - << " is not an ELF image, so notes cannot be added to it.\n"; - return Buf; - } + Expected> BinOrErr = + ObjectFile::createELFObjectFile(Buf->getMemBufferRef(), + /*InitContent=*/false); + if (Error E = BinOrErr.takeError()) { + consumeError(std::move(E)); + // This warning is questionable, but let it be here, + // assuming that most OpenMP offload models use ELF offload images. + warningOS() << OriginalFileName + << " is not an ELF image, so notes cannot be added to it.\n"; + return Buf; + } - // If we fail to add the note section, we just pass through the original - // ELF image for wrapping. At some point we should enforce the note section - // and start emitting errors vs warnings. - endianness Endianness; - if (isa(BinOrErr->get()) || - isa(BinOrErr->get())) { - Endianness = endianness::little; - } else if (isa(BinOrErr->get()) || - isa(BinOrErr->get())) { - Endianness = endianness::big; - } else { - warningOS() << OriginalFileName - << " is an ELF image of unrecognized format.\n"; - return Buf; - } + // If we fail to add the note section, we just pass through the original + // ELF image for wrapping. At some point we should enforce the note section + // and start emitting errors vs warnings. + endianness Endianness; + if (isa(BinOrErr->get()) || + isa(BinOrErr->get())) { + Endianness = endianness::little; + } else if (isa(BinOrErr->get()) || + isa(BinOrErr->get())) { + Endianness = endianness::big; + } else { + warningOS() << OriginalFileName + << " is an ELF image of unrecognized format.\n"; + return Buf; + } - // Create temporary file for the data of a new SHT_NOTE section. - // We fill it in with data and then pass to llvm-objcopy invocation - // for reading. - Twine NotesFileModel = OriginalFileName + Twine(".elfnotes.%%%%%%%.tmp"); - Expected NotesTemp = - sys::fs::TempFile::create(NotesFileModel); - if (Error E = NotesTemp.takeError()) { - handleErrorAsWarning(createFileError(NotesFileModel, std::move(E))); - return Buf; - } - TempFiles.push_back(NotesTemp->TmpName); - - // Create temporary file for the updated ELF image. - // This is an empty file that we pass to llvm-objcopy invocation - // for writing. - Twine ELFFileModel = OriginalFileName + Twine(".elfwithnotes.%%%%%%%.tmp"); - Expected ELFTemp = sys::fs::TempFile::create(ELFFileModel); - if (Error E = ELFTemp.takeError()) { - handleErrorAsWarning(createFileError(ELFFileModel, std::move(E))); - return Buf; - } - TempFiles.push_back(ELFTemp->TmpName); - - // Keep the new ELF image file to reserve the name for the future - // llvm-objcopy invocation. - std::string ELFTmpFileName = ELFTemp->TmpName; - if (Error E = ELFTemp->keep(ELFTmpFileName)) { - handleErrorAsWarning(createFileError(ELFTmpFileName, std::move(E))); - return Buf; - } + // Create temporary file for the data of a new SHT_NOTE section. + // We fill it in with data and then pass to llvm-objcopy invocation + // for reading. + Twine NotesFileModel = OriginalFileName + Twine(".elfnotes.%%%%%%%.tmp"); + Expected NotesTemp = + sys::fs::TempFile::create(NotesFileModel); + if (Error E = NotesTemp.takeError()) { + handleErrorAsWarning(createFileError(NotesFileModel, std::move(E))); + return Buf; + } + TempFiles.push_back(NotesTemp->TmpName); + + // Create temporary file for the updated ELF image. + // This is an empty file that we pass to llvm-objcopy invocation + // for writing. + Twine ELFFileModel = OriginalFileName + Twine(".elfwithnotes.%%%%%%%.tmp"); + Expected ELFTemp = + sys::fs::TempFile::create(ELFFileModel); + if (Error E = ELFTemp.takeError()) { + handleErrorAsWarning(createFileError(ELFFileModel, std::move(E))); + return Buf; + } + TempFiles.push_back(ELFTemp->TmpName); - // Write notes to the *elfnotes*.tmp file. - raw_fd_ostream NotesOS(NotesTemp->FD, false); + // Keep the new ELF image file to reserve the name for the future + // llvm-objcopy invocation. + std::string ELFTmpFileName = ELFTemp->TmpName; + if (Error E = ELFTemp->keep(ELFTmpFileName)) { + handleErrorAsWarning(createFileError(ELFTmpFileName, std::move(E))); + return Buf; + } - struct NoteTy { - // Note name is a null-terminated "LLVMOMPOFFLOAD". - std::string Name; - // Note type defined in llvm/include/llvm/BinaryFormat/ELF.h. - uint32_t Type = 0; - // Each note has type-specific associated data. - std::string Desc; + // Write notes to the *elfnotes*.tmp file. + raw_fd_ostream NotesOS(NotesTemp->FD, false); - NoteTy(std::string &&Name, uint32_t Type, std::string &&Desc) - : Name(std::move(Name)), Type(Type), Desc(std::move(Desc)) {} - }; + struct NoteTy { + // Note name is a null-terminated "LLVMOMPOFFLOAD". + std::string Name; + // Note type defined in llvm/include/llvm/BinaryFormat/ELF.h. + uint32_t Type = 0; + // Each note has type-specific associated data. + std::string Desc; - // So far we emit just three notes. - SmallVector Notes; - // Version of the offload image identifying the structure of the ELF image. - // Version 1.0 does not have any specific requirements. - // We may come up with some structure that has to be honored by all - // offload implementations in future (e.g. to let libomptarget - // get some information from the offload image). - Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION, - OPENMP_OFFLOAD_IMAGE_VERSION); - // This is a producer identification string. We are LLVM! - Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER, - "LLVM"); - // This is a producer version. Use the same format that is used - // by clang to report the LLVM version. - Notes.emplace_back("LLVMOMPOFFLOAD", - ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION, - LLVM_VERSION_STRING + NoteTy(std::string &&Name, uint32_t Type, std::string &&Desc) + : Name(std::move(Name)), Type(Type), Desc(std::move(Desc)) {} + }; + + // So far we emit just three notes. + SmallVector Notes; + // Version of the offload image identifying the structure of the ELF image. + // Version 1.0 does not have any specific requirements. + // We may come up with some structure that has to be honored by all + // offload implementations in future (e.g. to let libomptarget + // get some information from the offload image). + Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION, + OPENMP_OFFLOAD_IMAGE_VERSION); + // This is a producer identification string. We are LLVM! + Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER, + "LLVM"); + // This is a producer version. Use the same format that is used + // by clang to report the LLVM version. + Notes.emplace_back("LLVMOMPOFFLOAD", + ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION, + LLVM_VERSION_STRING #ifdef LLVM_REVISION - " " LLVM_REVISION + " " LLVM_REVISION #endif - ); - - // Return the amount of padding required for a blob of N bytes - // to be aligned to Alignment bytes. - auto getPadAmount = [](uint32_t N, uint32_t Alignment) -> uint32_t { - uint32_t Mod = (N % Alignment); - if (Mod == 0) - return 0; - return Alignment - Mod; - }; - auto emitPadding = [&getPadAmount](raw_ostream &OS, uint32_t Size) { - for (uint32_t I = 0; I < getPadAmount(Size, 4); ++I) - OS << '\0'; - }; + ); + + // Return the amount of padding required for a blob of N bytes + // to be aligned to Alignment bytes. + auto getPadAmount = [](uint32_t N, uint32_t Alignment) -> uint32_t { + uint32_t Mod = (N % Alignment); + if (Mod == 0) + return 0; + return Alignment - Mod; + }; + auto emitPadding = [&getPadAmount](raw_ostream &OS, uint32_t Size) { + for (uint32_t I = 0; I < getPadAmount(Size, 4); ++I) + OS << '\0'; + }; - // Put notes into the file. - for (auto &N : Notes) { - assert(!N.Name.empty() && "We should not create notes with empty names."); - // Name must be null-terminated. - if (N.Name.back() != '\0') - N.Name += '\0'; - uint32_t NameSz = N.Name.size(); - uint32_t DescSz = N.Desc.size(); - // A note starts with three 4-byte values: - // NameSz - // DescSz - // Type - // These three fields are endian-sensitive. - support::endian::write(NotesOS, NameSz, Endianness); - support::endian::write(NotesOS, DescSz, Endianness); - support::endian::write(NotesOS, N.Type, Endianness); - // Next, we have a null-terminated Name padded to a 4-byte boundary. - NotesOS << N.Name; - emitPadding(NotesOS, NameSz); - if (DescSz == 0) - continue; - // Finally, we have a descriptor, which is an arbitrary flow of bytes. - NotesOS << N.Desc; - emitPadding(NotesOS, DescSz); - } - NotesOS.flush(); + // Put notes into the file. + for (auto &N : Notes) { + assert(!N.Name.empty() && "We should not create notes with empty names."); + // Name must be null-terminated. + if (N.Name.back() != '\0') + N.Name += '\0'; + uint32_t NameSz = N.Name.size(); + uint32_t DescSz = N.Desc.size(); + // A note starts with three 4-byte values: + // NameSz + // DescSz + // Type + // These three fields are endian-sensitive. + support::endian::write(NotesOS, NameSz, Endianness); + support::endian::write(NotesOS, DescSz, Endianness); + support::endian::write(NotesOS, N.Type, Endianness); + // Next, we have a null-terminated Name padded to a 4-byte boundary. + NotesOS << N.Name; + emitPadding(NotesOS, NameSz); + if (DescSz == 0) + continue; + // Finally, we have a descriptor, which is an arbitrary flow of bytes. + NotesOS << N.Desc; + emitPadding(NotesOS, DescSz); + } + NotesOS.flush(); - // Keep the notes file. - std::string NotesTmpFileName = NotesTemp->TmpName; - if (Error E = NotesTemp->keep(NotesTmpFileName)) { - handleErrorAsWarning(createFileError(NotesTmpFileName, std::move(E))); - return Buf; - } + // Keep the notes file. + std::string NotesTmpFileName = NotesTemp->TmpName; + if (Error E = NotesTemp->keep(NotesTmpFileName)) { + handleErrorAsWarning(createFileError(NotesTmpFileName, std::move(E))); + return Buf; + } - // Run llvm-objcopy like this: - // llvm-objcopy --add-section=.note.openmp= \ + // Run llvm-objcopy like this: + // llvm-objcopy --add-section=.note.openmp= \ // // // This will add a SHT_NOTE section on top of the original ELF. @@ -1604,18 +1605,10 @@ MemoryBuffer *BinaryWrapper::addELFNotes(MemoryBuffer *Buf, return Buf; } - // Substitute the original ELF with new one. - ErrorOr> BufOrErr = - MemoryBuffer::getFile(ELFTmpFileName); - if (!BufOrErr) { - handleErrorAsWarning(createFileError(ELFTmpFileName, BufOrErr.getError())); - return Buf; + AutoGcBufs.emplace_back(std::move(*BufOrErr)); + return AutoGcBufs.back().get(); } - AutoGcBufs.emplace_back(std::move(*BufOrErr)); - return AutoGcBufs.back().get(); -} - llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, const BinaryWrapper::Image &Img) { Out << "\n{\n"; @@ -1682,7 +1675,7 @@ template class ListArgsSequencer { /// The only constructor. /// Sz - total number of options on the command line /// Args - the cl::list objects to sequence elements of - ListArgsSequencer(size_t Sz, Tys &...Args) + ListArgsSequencer(size_t Sz, Tys &... Args) : Prevs(Args.end()...), Iters(Args.begin()...) { // make OptListIDs big enough to hold IDs of all options coming from the // command line and initialize all IDs to default class -1 diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 852c40fb1daaf..f92a12be7696f 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -57,4 +57,4 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress COMPONENT sycl-compress ) -set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress) \ No newline at end of file +set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress) diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index c00cd265c9016..ec558936259d2 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1,4 +1,4 @@ char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); -char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); \ No newline at end of file +char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); diff --git a/sycl-compress/src/CMakeLists.txt b/sycl-compress/src/CMakeLists.txt index f0ccc1141220a..f49aa4354b74c 100644 --- a/sycl-compress/src/CMakeLists.txt +++ b/sycl-compress/src/CMakeLists.txt @@ -21,4 +21,4 @@ set(SOURCES sycl-compress.cpp ) -add_syclcompress_lib(sycl-compress ${SOURCES}) \ No newline at end of file +add_syclcompress_lib(sycl-compress ${SOURCES}) diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index cb702a32243e0..c2bdc63b12ea2 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -53,4 +53,4 @@ decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { } return dstBuffer; -} \ No newline at end of file +} From d89f41bc5198bc31044f66e9770d4796335b9765 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 18 Aug 2024 11:13:52 -0700 Subject: [PATCH 06/50] More cleanup --- clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp | 2 -- sycl/source/detail/program_manager/program_manager.cpp | 8 ++++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index f511115758f1c..fe8fb4f06ec05 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1144,8 +1144,6 @@ class BinaryWrapper { << Bin->getBufferSize() << " Compressed image size:" << dstSize << "\n"; - errs() << "Image format:" << Img.Fmt << "\n"; - errs() << "Image target:" << Img.Tgt << "\n"; Fbin = addDeviceImageToModule( ArrayRef((const char *)dst, dstSize), Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 41dc7ea67c506..5641d4f443310 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -171,6 +171,14 @@ static bool isDeviceBinaryTypeSupported(const context &C, return "SPIR-V"; case SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE: return "LLVM IR"; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: + return "compressed none"; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: + return "compressed native"; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: + return "compressed SPIR-V"; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: + return "compressed LLVM IR"; } assert(false && "Unknown device image format"); return "unknown"; From fb643e3bad4a398dbf6f61e8966bbbdefae78289 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 18 Aug 2024 15:15:28 -0700 Subject: [PATCH 07/50] Add option in clang driver to trigger compression. --- .../clang/Basic/DiagnosticDriverKinds.td | 5 ++- clang/include/clang/Driver/Options.td | 13 +++++++ clang/lib/Driver/ToolChains/Clang.cpp | 36 +++++++++++++++++++ .../ClangOffloadWrapper.cpp | 12 +++++-- sycl/test/lit.cfg.py | 2 ++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index fe950ec834d2f..5663e80a567fc 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -896,4 +896,7 @@ def err_drv_triple_version_invalid : Error< def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; -} + +def warn_sycl_compress_opt_ignored : Warning< + "CLI option %0 ignored as it can not be used without -fsycl_compress_dev_imgs">; +} \ No newline at end of file diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4b335becf9736..41dab07096991 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6981,6 +6981,19 @@ def : Flag<["-"], "fno-sycl-explicit-simd">, Flags<[Deprecated]>, Group, HelpText<"Disable SYCL explicit SIMD extension. (deprecated)">; +// CLI options of device image compression. +// -fsycl-compress-dev-imgs +// -fsycl-compress-level= +// -fsycl-compress-threshold= +def fsycl_compress_dev_imgs : Flag<["-"], "fsycl-compress-dev-imgs">, + HelpText<"Enables compression of device images.">; +def fsycl_compress_level_EQ : Joined<["-"], "fsycl-compress-level=">, + HelpText<"ZSTD level to compress device images with. Should be used" + "only if -fsycl-compress-dev-imgs is enabled. (Default: 10)">; +def fsycl_compress_threshold_EQ : Joined<["-"], "fsycl-compress-threshold=">, + HelpText<"Specify the minimum device image size (in bytes) for compression. Should be used" + "only if -fsycl-compress-dev-imgs is enabled. (Default: 1024)">; + // OS-specific options let Flags = [TargetSpecific] in { defm android_pad_segment : BooleanFFlag<"android-pad-segment">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 8df597de8f5ff..b0532ee8627d6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10001,6 +10001,42 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, SmallString<128> TargetTripleOpt = TT.getArchName(); bool WrapFPGADevice = false; bool FPGAEarly = false; + + // Validate and propogate CLI options related to dveice image compression. + { + // -fsycl-compress-dev-imgs + bool isImgCompress = false; + if (C.getInputArgs().getLastArg(options::OPT_fsycl_compress_dev_imgs)) { + isImgCompress = true; + WrapperArgs.push_back( + C.getArgs().MakeArgString(Twine("-sycl-compress-dev-imgs"))); + } + + // -fsycl-compress-level=<> + if (Arg *A = C.getInputArgs().getLastArg( + options::OPT_fsycl_compress_level_EQ)) { + + if (!isImgCompress) + C.getDriver().Diag(diag::warn_sycl_compress_opt_ignored) + << A->getAsString(C.getInputArgs()); + else + WrapperArgs.push_back(C.getArgs().MakeArgString( + Twine("-sycl-compress-level=") + A->getValue())); + } + + // -fsycl-compress-threshold=<> + if (Arg *A = C.getInputArgs().getLastArg( + options::OPT_fsycl_compress_threshold_EQ)) { + + if (!isImgCompress) + C.getDriver().Diag(diag::warn_sycl_compress_opt_ignored) + << A->getAsString(C.getInputArgs()); + else + WrapperArgs.push_back(C.getArgs().MakeArgString( + Twine("-sycl-compress-threshold=") + A->getValue())); + } + } + if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) { WrapFPGADevice = true; FPGAEarly = (A->getValue() == StringRef("early")); diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index fe8fb4f06ec05..99dd5d16757cb 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -143,6 +143,7 @@ static cl::list Inputs(cl::Positional, cl::OneOrMore, cl::cat(ClangOffloadWrapperCategory)); // CLI options for device image compression. +// TODO: Turn off this option by default. static cl::opt SYCLCompressDevImg("sycl-compress-dev-imgs", cl::init(true), cl::Optional, cl::desc("Enable device image compression using ZSTD."), @@ -153,6 +154,12 @@ static cl::opt cl::desc("ZSTD Compression level. Default: 10"), cl::cat(ClangOffloadWrapperCategory)); +static cl::opt + SYCLCompressThreshold("sycl-compress-threshold", cl::init(1024), + cl::Optional, + cl::desc("ZSTD Compression threshold. Default: 1024"), + cl::cat(ClangOffloadWrapperCategory)); + // Binary image formats supported by this tool. The support basically means // mapping string representation given at the command line to a value from this // enum. No format checking is performed. @@ -1111,9 +1118,10 @@ class BinaryWrapper { } else { // Don't compress if the user explicitly specifies the binary image - // format. + // format or if the image is smaller than the threshold. if (Kind != OffloadKind::SYCL || !SYCLCompressDevImg || - Img.Fmt != BinaryImageFormat::none) { + Img.Fmt != BinaryImageFormat::none || + static_cast(Bin->getBufferSize()) < SYCLCompressThreshold) { Fbin = addDeviceImageToModule( ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index ff955fa1f269e..18912f60be5d6 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -142,6 +142,8 @@ config.substitutions.append(("%sycl_triple", triple)) additional_flags = config.sycl_clang_extra_flags.split(" ") +# TODO: Remove this. +additional_flags.append("-fsycl-compress-dev-imgs") if config.cuda == "ON": config.available_features.add("cuda") From 151e70a1796ba55941fb68fc50d622edc234b6f3 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 18 Aug 2024 23:42:22 -0700 Subject: [PATCH 08/50] Cleanup + build fix --- .../clang/Basic/DiagnosticDriverKinds.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- .../tools/clang-linker-wrapper/CMakeLists.txt | 1 - .../clang-offload-wrapper/CMakeLists.txt | 1 - sycl-compress/CMakeLists.txt | 39 ++++++++++++++----- .../include/sycl-compress/sycl-compress.h | 2 - sycl-compress/src/CMakeLists.txt | 24 ------------ sycl-compress/src/sycl-compress.cpp | 4 +- 8 files changed, 34 insertions(+), 41 deletions(-) delete mode 100644 sycl-compress/src/CMakeLists.txt diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5663e80a567fc..5a3d0d988c3b6 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -899,4 +899,4 @@ def warn_missing_include_dirs : Warning< def warn_sycl_compress_opt_ignored : Warning< "CLI option %0 ignored as it can not be used without -fsycl_compress_dev_imgs">; -} \ No newline at end of file +} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b0532ee8627d6..9ad5ea6401c14 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10002,7 +10002,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, bool WrapFPGADevice = false; bool FPGAEarly = false; - // Validate and propogate CLI options related to dveice image compression. + // Validate and propogate CLI options related to device image compression. { // -fsycl-compress-dev-imgs bool isImgCompress = false; diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 22776afa92707..37dca30061e08 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -41,7 +41,6 @@ set(CLANG_LINKER_WRAPPER_LIB_DEPS clangBasic ) -#find_library(SYCL_COMPRESS_LIB sycl-compress PATHS ${CMAKE_BINARY_DIR}/lib) target_link_libraries(clang-linker-wrapper PRIVATE ${CLANG_LINKER_WRAPPER_LIB_DEPS} diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index d9e3b7660ea4b..f99b5d8aa9465 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -17,7 +17,6 @@ include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) add_dependencies(clang clang-offload-wrapper) -#find_library(SYCL_COMPRESS_LIB sycl-compress PATHS ${CMAKE_BINARY_DIR}/lib NO_DEFAULT_PATH) clang_target_link_libraries(clang-offload-wrapper PRIVATE ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index f92a12be7696f..e042b0ec979f0 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -6,33 +6,54 @@ project (sycl-compress VERSION "${SYCL_COMPRESS_VERSION}" LANGUAGES CXX) # Setting the same version as SYCL set(CMAKE_CXX_STANDARD 17) -set(SYCL_COMPRESS_DIR ${CMAKE_CURRENT_LIST_DIR}) - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE) +set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}) set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) -# Download and build zstd +# Download and build zstd statically. +set(ZSTD_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install) + +if (WIN32) + set(ZSTD_LIBRARY_NAME libzstd.lib) +else() + set(ZSTD_LIBRARY_NAME libzstd.a) +endif() + include(ExternalProject) ExternalProject_Add(zstd DEPENDS URL https://github.com/facebook/zstd/releases/download/v1.5.6/zstd-1.5.6.tar.gz URL_HASH SHA256=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 SOURCE_SUBDIR build/cmake - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/zstd/install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_MULTITHREAD_SUPPORT=OFF - BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZSTD_INSTALL_DIR} -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_MULTITHREAD_SUPPORT=OFF + BUILD_BYPRODUCTS "${ZSTD_INSTALL_DIR}/lib/${ZSTD_LIBRARY_NAME}" + DOWNLOAD_EXTRACT_TIMESTAMP ON ) -set(ZSTD_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/lib/libzstd.a CACHE PATH "ZSTD library" FORCE) -set(ZSTD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install/include) +set(ZSTD_LIBRARY ${ZSTD_INSTALL_DIR}/lib/${ZSTD_LIBRARY_NAME}) +set(ZSTD_INCLUDE_DIR ${ZSTD_INSTALL_DIR}/include) -include_directories(${ZSTD_INCLUDE_DIR}) +# Setup sycl-compress library. Add dependency on zstd. +set(SOURCES + src/sycl-compress.cpp +) + +add_library(sycl-compress SHARED ${SOURCES}) -add_subdirectory(src) add_dependencies(sycl-compress zstd) +target_include_directories(sycl-compress PRIVATE ${SYCL_COMPRESS_INCLUDE_DIR}/include ${ZSTD_INCLUDE_DIR}) target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) +# Set the location of the library installation +include(GNUInstallDirs) +install(TARGETS sycl-compress + RUNTIME DESTINATION bin COMPONENT sycl-compress + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress +) + if (LLVM_BINARY_DIR) file(GLOB_RECURSE SYCL_COMPRESS_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress/*") string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "${LLVM_BINARY_DIR}" diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index ec558936259d2..2f29a320ab7cf 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1,4 +1,2 @@ - - char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); diff --git a/sycl-compress/src/CMakeLists.txt b/sycl-compress/src/CMakeLists.txt deleted file mode 100644 index f49aa4354b74c..0000000000000 --- a/sycl-compress/src/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -include(GNUInstallDirs) - -macro(add_syclcompress_lib target_name) - - add_library(${target_name} SHARED ${ARGN}) - target_include_directories(${target_name} PRIVATE ${SYCL_COMPRESS_DIR}/include ${ZSTD_INCLUDE_DIR}) - - # if (MSVC) - # target_compile_options(${target_name} PRIVATE /EHsc) - # endif() - - # Set the location of the library installation - install(TARGETS ${target_name} - RUNTIME DESTINATION bin COMPONENT sycl-compress - LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress - ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress - ) -endmacro() - -set(SOURCES - sycl-compress.cpp -) - -add_syclcompress_lib(sycl-compress ${SOURCES}) diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index c2bdc63b12ea2..dbe076bd0f8d4 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -7,7 +7,7 @@ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -__attribute__((visibility("default"))) char * +char * compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { auto dstBufferSize = ZSTD_compressBound(srcSize); char *dstBuffer = static_cast(malloc(dstBufferSize)); @@ -25,7 +25,7 @@ compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { return dstBuffer; } -__attribute__((visibility("default"))) char * +char * decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. From 2983fab0edc1cd8c03bc9932990699405583abd9 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 19 Aug 2024 08:07:44 -0700 Subject: [PATCH 09/50] Fix ZSTD build on windows, RHEL --- sycl-compress/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index e042b0ec979f0..a9f3461a30d94 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -16,7 +16,7 @@ set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) set(ZSTD_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install) if (WIN32) - set(ZSTD_LIBRARY_NAME libzstd.lib) + set(ZSTD_LIBRARY_NAME zstd_static.lib) else() set(ZSTD_LIBRARY_NAME libzstd.a) endif() @@ -28,11 +28,11 @@ ExternalProject_Add(zstd URL_HASH SHA256=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 SOURCE_SUBDIR build/cmake CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZSTD_INSTALL_DIR} -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_MULTITHREAD_SUPPORT=OFF - BUILD_BYPRODUCTS "${ZSTD_INSTALL_DIR}/lib/${ZSTD_LIBRARY_NAME}" + BUILD_BYPRODUCTS "${ZSTD_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${ZSTD_LIBRARY_NAME}" DOWNLOAD_EXTRACT_TIMESTAMP ON ) -set(ZSTD_LIBRARY ${ZSTD_INSTALL_DIR}/lib/${ZSTD_LIBRARY_NAME}) +set(ZSTD_LIBRARY ${ZSTD_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${ZSTD_LIBRARY_NAME}) set(ZSTD_INCLUDE_DIR ${ZSTD_INSTALL_DIR}/include) # Setup sycl-compress library. Add dependency on zstd. From 4493984f265d13af9e55f218072ee7911a9fa671 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 19 Aug 2024 09:19:27 -0700 Subject: [PATCH 10/50] Fix clang warnings and formatting --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 ++- sycl-compress/src/sycl-compress.cpp | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5a3d0d988c3b6..ce8fddf8a3c0c 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -898,5 +898,6 @@ def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; def warn_sycl_compress_opt_ignored : Warning< - "CLI option %0 ignored as it can not be used without -fsycl_compress_dev_imgs">; + "CLI option %0 ignored as it can not be used without -fsycl_compress_dev_imgs">, + InGroup, DefaultIgnore; } diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index dbe076bd0f8d4..f326d24602c9b 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -7,8 +7,8 @@ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -char * -compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { +char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, + int level) { auto dstBufferSize = ZSTD_compressBound(srcSize); char *dstBuffer = static_cast(malloc(dstBufferSize)); dstSize = ZSTD_compress(static_cast(dstBuffer), dstBufferSize, @@ -25,8 +25,7 @@ compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { return dstBuffer; } -char * -decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { +char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. // TODO: Throw if the decompression size is too large. From dbb96a7c4f7168d47761c0dc7b03cfa23975d7af Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 19 Aug 2024 22:54:18 -0700 Subject: [PATCH 11/50] Try fixing Windows build --- sycl-compress/CMakeLists.txt | 3 --- sycl-compress/src/sycl-compress.cpp | 13 ++++++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index a9f3461a30d94..6067ed3dc287d 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -49,7 +49,6 @@ target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) # Set the location of the library installation include(GNUInstallDirs) install(TARGETS sycl-compress - RUNTIME DESTINATION bin COMPONENT sycl-compress LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress ) @@ -77,5 +76,3 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT sycl-compress ) - -set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress) diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index f326d24602c9b..4d91a97672117 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -7,8 +7,11 @@ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, - int level) { +#if defined(_MSC_VER) +__declspec(dllexport) +#endif +char * +compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { auto dstBufferSize = ZSTD_compressBound(srcSize); char *dstBuffer = static_cast(malloc(dstBufferSize)); dstSize = ZSTD_compress(static_cast(dstBuffer), dstBufferSize, @@ -25,7 +28,11 @@ char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, return dstBuffer; } -char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { +#if defined(_MSC_VER) +__declspec(dllexport) +#endif +char * +decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. // TODO: Throw if the decompression size is too large. From 7d7edc6c17263b289d98a24d5e092b1101112424 Mon Sep 17 00:00:00 2001 From: Agarwal Date: Sun, 25 Aug 2024 23:12:42 -0700 Subject: [PATCH 12/50] Fix linkage error while windows build --- sycl-compress/include/sycl-compress/sycl-compress.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index 2f29a320ab7cf..a0c2f79731fed 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1,2 +1,10 @@ +#if defined(_MSC_VER) +__declspec(dllexport) +#endif char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); + + +#if defined(_MSC_VER) +__declspec(dllexport) +#endif char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); From f0aca25a85ef2aff55e6d259ea859f6b502e41ba Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 26 Aug 2024 11:04:18 -0700 Subject: [PATCH 13/50] Fix include_directory for sycl-compress --- sycl-compress/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 6067ed3dc287d..14c2f1c1b16cb 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -43,7 +43,8 @@ set(SOURCES add_library(sycl-compress SHARED ${SOURCES}) add_dependencies(sycl-compress zstd) -target_include_directories(sycl-compress PRIVATE ${SYCL_COMPRESS_INCLUDE_DIR}/include ${ZSTD_INCLUDE_DIR}) +target_include_directories(sycl-compress PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${ZSTD_INCLUDE_DIR}) +target_include_directories(sycl-compress PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) # Set the location of the library installation From 758723c6030215722f4bf1fee16811e7c4cdae90 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 26 Aug 2024 16:43:17 -0700 Subject: [PATCH 14/50] Fix formatting. Remove redundant incude_directory in CMakeFiles --- clang/tools/clang-linker-wrapper/CMakeLists.txt | 1 - clang/tools/clang-offload-wrapper/CMakeLists.txt | 2 -- sycl-compress/include/sycl-compress/sycl-compress.h | 6 ++++-- sycl/source/CMakeLists.txt | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 37dca30061e08..9e4fd7a5b4bfe 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -35,7 +35,6 @@ add_clang_tool(clang-linker-wrapper ) add_dependencies(clang-linker-wrapper sycl-compress) -include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) set(CLANG_LINKER_WRAPPER_LIB_DEPS clangBasic diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index f99b5d8aa9465..1e3bb9ef0ca08 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -13,8 +13,6 @@ set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS ) add_dependencies(clang-offload-wrapper sycl-compress) -include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) - add_dependencies(clang clang-offload-wrapper) clang_target_link_libraries(clang-offload-wrapper diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index a0c2f79731fed..950ca12963781 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1,10 +1,12 @@ #if defined(_MSC_VER) __declspec(dllexport) #endif -char *compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); +char * +compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); #if defined(_MSC_VER) __declspec(dllexport) #endif -char *decompressBlob(const char *src, size_t srcSize, size_t &dstSize); +char * +decompressBlob(const char *src, size_t srcSize, size_t &dstSize); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index d878b753d25c8..ef10767cceb89 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -70,7 +70,6 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) endif() # Include sycl-compress headers and link library. - include_directories(${CMAKE_BINARY_DIR}/include/sycl-compress/) target_link_libraries(${LIB_NAME} PRIVATE sycl-compress) target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) From 7282eec97176f60ad5580c2da9e064d5bcd94287 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 26 Aug 2024 17:55:35 -0700 Subject: [PATCH 15/50] Another test --- sycl/source/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index ef10767cceb89..b1ed1afb81b3a 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,7 +69,8 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() - # Include sycl-compress headers and link library. + # Add dependency on sycl-compress. + add_dependencies(${LIB_NAME} sycl-compress) target_link_libraries(${LIB_NAME} PRIVATE sycl-compress) target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) From eda727edf6ec24ee0b87c431b9515db880ac8f56 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 26 Aug 2024 18:25:28 -0700 Subject: [PATCH 16/50] Another attempt at fixing win build --- sycl-compress/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 14c2f1c1b16cb..7ce6fe7972c82 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -38,13 +38,14 @@ set(ZSTD_INCLUDE_DIR ${ZSTD_INSTALL_DIR}/include) # Setup sycl-compress library. Add dependency on zstd. set(SOURCES src/sycl-compress.cpp + include/sycl-compress/sycl-compress.h ) add_library(sycl-compress SHARED ${SOURCES}) add_dependencies(sycl-compress zstd) -target_include_directories(sycl-compress PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${ZSTD_INCLUDE_DIR}) -target_include_directories(sycl-compress PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(sycl-compress PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${ZSTD_INCLUDE_DIR}) +target_include_directories(sycl-compress PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) # Set the location of the library installation From 94a98c9899be823387f87bed1eee9140422d17d5 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 27 Aug 2024 09:28:45 -0700 Subject: [PATCH 17/50] Explicitly include sycl-compress headers --- sycl/source/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index b1ed1afb81b3a..1d530bdcb9c71 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -70,6 +70,10 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) endif() # Add dependency on sycl-compress. + if (NOT DEFINED LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR) + message (FATAL_ERROR "Undefined LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR variable: Must be set when using sycl-compress") + endif() + include_directories(${LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR}/include) add_dependencies(${LIB_NAME} sycl-compress) target_link_libraries(${LIB_NAME} PRIVATE sycl-compress) From 6cc23ec310e8506a41dce5942e14cbbc046f642d Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 27 Aug 2024 15:58:20 -0700 Subject: [PATCH 18/50] Reuse context in sycl-compress; Pass unique_ptr to caller instead of raw pointer. --- .../ClangOffloadWrapper.cpp | 14 ++- sycl-compress/CMakeLists.txt | 1 + .../include/sycl-compress/sycl-compress.h | 63 +++++++++-- sycl-compress/src/sycl-compress.cpp | 107 +++++++++++++----- sycl/source/detail/device_binary_image.cpp | 12 +- sycl/source/detail/device_binary_image.hpp | 3 + 6 files changed, 151 insertions(+), 49 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 99dd5d16757cb..d696550ac2e5d 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1132,13 +1132,15 @@ class BinaryWrapper { errs() << " Compressing device image\n"; size_t dstSize; - void *dst = compressBlob(Bin->getBufferStart(), Bin->getBufferSize(), - dstSize, SYCLCompressLevel); + auto CompressedBuffer = + std::move(sycl_compress::ZSTDCompressor::CompressBlob( + Bin->getBufferStart(), Bin->getBufferSize(), dstSize, + SYCLCompressLevel)); - if (!dstSize) { + if (sycl_compress::ZSTDCompressor::GetLastError()) { if (Verbose) { - errs() << " Compression failed with error:" << (char *)dst - << "\n"; + errs() << " Compression failed with error:" + << (char *)CompressedBuffer.get() << "\n"; errs() << " Falling back to uncompressed image\n"; } @@ -1153,7 +1155,7 @@ class BinaryWrapper { << " Compressed image size:" << dstSize << "\n"; Fbin = addDeviceImageToModule( - ArrayRef((const char *)dst, dstSize), + ArrayRef((const char *)CompressedBuffer.get(), dstSize), Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, Img.Tgt); diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 7ce6fe7972c82..83e511d5588c9 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -44,6 +44,7 @@ set(SOURCES add_library(sycl-compress SHARED ${SOURCES}) add_dependencies(sycl-compress zstd) +target_compile_definitions(sycl-compress PRIVATE SYCL_COMPRESS_BUILD) target_include_directories(sycl-compress PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${ZSTD_INCLUDE_DIR}) target_include_directories(sycl-compress PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index 950ca12963781..e2f7fe8043f18 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -1,12 +1,57 @@ -#if defined(_MSC_VER) -__declspec(dllexport) -#endif -char * -compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level); +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#pragma once + +#include +namespace sycl_compress { -#if defined(_MSC_VER) -__declspec(dllexport) +// Singleton class to handle ZSTD compression and decompression. +class +#ifdef _WIN32 +#ifdef SYCL_COMPRESS_BUILD + __declspec(dllexport) // When building sycl-compress +#else + __declspec(dllimport) // When using sycl-compress headers in dependencies. +#endif #endif -char * -decompressBlob(const char *src, size_t srcSize, size_t &dstSize); + ZSTDCompressor { +private: + ZSTDCompressor(); + ~ZSTDCompressor(); + + ZSTDCompressor(const ZSTDCompressor &) = delete; + ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; + + // Get the singleton instance of the ZSTDCompressor class. + static ZSTDCompressor &GetSingletonInstance(); + + // Public APIs +public: + // Return 0 is last (de)compression was successful, otherwise return error + // code. + static int GetLastError(); + + // Returns a string representation of the error code. + // If the eror code is 0, it returns an empty string. + static std::string GetErrorString(int code); + + // Blob (de)compression do not assume format/structure of the input buffer. + static std::unique_ptr CompressBlob(const char *src, size_t srcSize, + size_t &dstSize, int level); + + static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, + size_t &dstSize); + + // Data fields +private: + int m_lastError; + // ZSTD context. Reusing ZSTD context speeds up subsequent (de)compression. + // Storing as void* to avoid including ZSTD headers in this file. + void *m_ZSTD_compression_ctx; + void *m_ZSTD_decompression_ctx; +}; +} // namespace sycl_compress \ No newline at end of file diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index 4d91a97672117..2bafe649c6572 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -1,3 +1,8 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// #include #include @@ -7,32 +12,64 @@ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -#if defined(_MSC_VER) -__declspec(dllexport) -#endif -char * -compressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { - auto dstBufferSize = ZSTD_compressBound(srcSize); - char *dstBuffer = static_cast(malloc(dstBufferSize)); - dstSize = ZSTD_compress(static_cast(dstBuffer), dstBufferSize, - static_cast(src), srcSize, level); +namespace sycl_compress { - // In case of compression error, return the error message and set dstSize to - // 0. - if (ZSTD_isError(dstSize)) { - std::cerr << "Error: " << ZSTD_getErrorName(dstSize) << "\n"; - strncpy(dstBuffer, ZSTD_getErrorName(dstSize), dstBufferSize); - dstSize = 0; +// Singleton instance of the ZSTDCompressor class. +ZSTDCompressor &ZSTDCompressor::GetSingletonInstance() { + static ZSTDCompressor instance; + return instance; +} + +// Initialize ZSTD context and error code. +ZSTDCompressor::ZSTDCompressor() { + m_ZSTD_compression_ctx = static_cast(ZSTD_createCCtx()); + m_ZSTD_decompression_ctx = static_cast(ZSTD_createDCtx()); + + if (!m_ZSTD_compression_ctx || !m_ZSTD_decompression_ctx) { + std::cerr << "Error creating ZSTD contexts. \n"; } - return dstBuffer; + m_lastError = 0; } -#if defined(_MSC_VER) -__declspec(dllexport) -#endif -char * -decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { +// Free ZSTD contexts. +ZSTDCompressor::~ZSTDCompressor() { + ZSTD_freeCCtx(static_cast(m_ZSTD_compression_ctx)); + ZSTD_freeDCtx(static_cast(m_ZSTD_decompression_ctx)); +} + +std::unique_ptr ZSTDCompressor::CompressBlob(const char *src, + size_t srcSize, + size_t &dstSize, int level) { + + auto &instance = GetSingletonInstance(); + + // Get maximum size of the compressed buffer and allocate it. + auto dstBufferSize = ZSTD_compressBound(srcSize); + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + + // Compress the input buffer. + dstSize = ZSTD_compressCCtx( + static_cast(instance.m_ZSTD_compression_ctx), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize, level); + + // Store the error code if compression failed. + if (ZSTD_isError(dstSize)) + instance.m_lastError = dstSize; + else + instance.m_lastError = 0; + + // Pass ownership of the buffer to the caller. + return std::move(dstBuffer); +} + +std::unique_ptr ZSTDCompressor::DecompressBlob(const char *src, + size_t srcSize, + size_t &dstSize) { + + auto &instance = GetSingletonInstance(); + // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. // TODO: Throw if the decompression size is too large. @@ -40,23 +77,37 @@ decompressBlob(const char *src, size_t srcSize, size_t &dstSize) { if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { + std::cerr << "Error determining size of uncompressed data\n"; - std::cerr << "Error: " << ZSTD_getErrorName(dstBufferSize) << "\n"; dstSize = 0; + instance.m_lastError = dstBufferSize; return nullptr; } - char *dstBuffer = static_cast(malloc(dstBufferSize)); - dstSize = ZSTD_decompress(static_cast(dstBuffer), dstBufferSize, - static_cast(src), srcSize); + // Allocate buffer for decompressed data. + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + + dstSize = ZSTD_decompressDCtx( + static_cast(instance.m_ZSTD_decompression_ctx), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize); // In case of decompression error, return the error message and set dstSize to // 0. if (ZSTD_isError(dstSize)) { - std::cerr << "Error: " << ZSTD_getErrorName(dstSize) << "\n"; - strncpy(dstBuffer, ZSTD_getErrorName(dstSize), dstBufferSize); + instance.m_lastError = dstSize; dstSize = 0; } - return dstBuffer; + // Pass ownership of the buffer to the caller. + return std::move(dstBuffer); +} + +int ZSTDCompressor::GetLastError() { + return GetSingletonInstance().m_lastError; +} + +std::string ZSTDCompressor::GetErrorString(int code) { + return ZSTD_getErrorName(code); } +} // namespace sycl_compress \ No newline at end of file diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index fcd183666f871..9ec8b231c8347 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -238,9 +238,9 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( size_t DecompressedSize = 0; size_t compressedDataSize = static_cast(CompressedBin->BinaryEnd - CompressedBin->BinaryStart); - char *DecompressedData = - decompressBlob(reinterpret_cast(CompressedBin->BinaryStart), - compressedDataSize, DecompressedSize); + m_DecompressedData = std::move(sycl_compress::ZSTDCompressor::DecompressBlob( + reinterpret_cast(CompressedBin->BinaryStart), + compressedDataSize, DecompressedSize)); if (!DecompressedSize) { std::cerr << "Failed to decompress device binary image\n"; @@ -248,7 +248,8 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( } Bin = new sycl_device_binary_struct(*CompressedBin); - Bin->BinaryStart = reinterpret_cast(DecompressedData); + Bin->BinaryStart = + reinterpret_cast(m_DecompressedData.get()); Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; // Get the new format. @@ -275,8 +276,7 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( } CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { - // De-allocate the decompressed image. - delete Bin->BinaryStart; + // De-allocate device binary struct. delete Bin; Bin = nullptr; } diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 9a376cdf6351a..95b7ac78c04f0 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -288,6 +288,9 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { RTDeviceBinaryImage::print(); std::cerr << " COMPRESSED\n"; } + +private: + std::unique_ptr m_DecompressedData; }; } // namespace detail From 4297d5ff3ca0b9facab986c8104e61592a94f9b4 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 27 Aug 2024 17:10:49 -0700 Subject: [PATCH 19/50] Add unit tests and performance tests for sycl-compress --- sycl-compress/CMakeLists.txt | 5 + .../include/sycl-compress/sycl-compress.h | 4 +- sycl-compress/src/sycl-compress.cpp | 2 +- .../unit-test/functional/CMakeLists.txt | 30 ++ .../unit-test/functional/api-test.cpp | 88 ++++++ .../perf/performance-measurement.cpp | 268 ++++++++++++++++++ 6 files changed, 394 insertions(+), 3 deletions(-) create mode 100644 sycl-compress/unit-test/functional/CMakeLists.txt create mode 100644 sycl-compress/unit-test/functional/api-test.cpp create mode 100644 sycl-compress/unit-test/perf/performance-measurement.cpp diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt index 83e511d5588c9..4fda495d49669 100644 --- a/sycl-compress/CMakeLists.txt +++ b/sycl-compress/CMakeLists.txt @@ -79,3 +79,8 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT sycl-compress ) + +# sycl-compress tests can be run using the check-sycl-compress target +add_custom_target(check-sycl-compress) +# Add tests +add_subdirectory(unit-test/functional) diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h index e2f7fe8043f18..0f8cf543a8771 100644 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ b/sycl-compress/include/sycl-compress/sycl-compress.h @@ -36,7 +36,7 @@ class static int GetLastError(); // Returns a string representation of the error code. - // If the eror code is 0, it returns an empty string. + // If the error code is 0, it returns "No error detected". static std::string GetErrorString(int code); // Blob (de)compression do not assume format/structure of the input buffer. @@ -54,4 +54,4 @@ class void *m_ZSTD_compression_ctx; void *m_ZSTD_decompression_ctx; }; -} // namespace sycl_compress \ No newline at end of file +} // namespace sycl_compress diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp index 2bafe649c6572..df2c834020775 100644 --- a/sycl-compress/src/sycl-compress.cpp +++ b/sycl-compress/src/sycl-compress.cpp @@ -110,4 +110,4 @@ int ZSTDCompressor::GetLastError() { std::string ZSTDCompressor::GetErrorString(int code) { return ZSTD_getErrorName(code); } -} // namespace sycl_compress \ No newline at end of file +} // namespace sycl_compress diff --git a/sycl-compress/unit-test/functional/CMakeLists.txt b/sycl-compress/unit-test/functional/CMakeLists.txt new file mode 100644 index 0000000000000..3fd60be6d91f2 --- /dev/null +++ b/sycl-compress/unit-test/functional/CMakeLists.txt @@ -0,0 +1,30 @@ +# if (NOT EXISTS ${XPTI_DIR}) +# message (FATAL_ERROR "Undefined XPTI_DIR variable: Must be set for tests to work!") +# endif() +# include_directories(${XPTI_DIR}/include) + +add_executable(SYCLCompressUnitTests EXCLUDE_FROM_ALL + api-test.cpp +) + +# Silence warnings from GTest +target_include_directories(SYCLCompressUnitTests SYSTEM PRIVATE + ${gtest_SOURCE_DIR}/include + ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include +) + +add_dependencies(SYCLCompressUnitTests sycl-compress) + +target_link_libraries(SYCLCompressUnitTests PRIVATE + llvm_gtest + llvm_gtest_main + sycl-compress + LLVMSupport +) + +add_test(NAME SYCLCompressTests COMMAND SYCLCompressUnitTests) + +add_custom_target(check-sycl-compress-unittest COMMAND + $/SYCLCompressUnitTests) +add_dependencies(check-sycl-compress-unittest SYCLCompressUnitTests) +add_dependencies(check-sycl-compress check-sycl-compress-unittest) diff --git a/sycl-compress/unit-test/functional/api-test.cpp b/sycl-compress/unit-test/functional/api-test.cpp new file mode 100644 index 0000000000000..f6c81009b2adf --- /dev/null +++ b/sycl-compress/unit-test/functional/api-test.cpp @@ -0,0 +1,88 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#include "sycl-compress/sycl-compress.h" + +#include +#include + +using namespace sycl_compress; +class syclCompressCorrectnessTest : public ::testing::Test {}; + +// Simple (de)compression of a string +TEST_F(syclCompressCorrectnessTest, CompressionTest) { + + std::string input = "Hello, World!"; + size_t compressedSize = 0; + auto compressedData = ZSTDCompressor::CompressBlob( + input.c_str(), input.size(), compressedSize, 1); + + ASSERT_NE(compressedData, nullptr); + ASSERT_GT(compressedSize, 0); + + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_GT(decompressedSize, 0); + + std::string decompressedStr(decompressedData.get(), decompressedSize); + ASSERT_EQ(input, decompressedStr); +} + +// Test getting error code and error string. +// Intentionally give incorrect input to decompress +// to trigger an error. +TEST_F(syclCompressCorrectnessTest, NegativeErrorTest) { + std::string input = "Hello, World!"; + size_t decompressedSize = 0; + auto compressedData = ZSTDCompressor::DecompressBlob( + input.c_str(), input.size(), decompressedSize); + + int errorCode = ZSTDCompressor::GetLastError(); + ASSERT_NE(errorCode, 0); + + std::string errorString = ZSTDCompressor::GetErrorString(errorCode); + ASSERT_NE(errorString, "No error detected"); +} + +// Test that the error code is 0 after a successful (de)compression. +TEST_F(syclCompressCorrectnessTest, PositiveErrorTest) { + std::string input = "Hello, World!"; + [[maybe_unused]] size_t compressedSize = 0; + [[maybe_unused]] auto compressedData = ZSTDCompressor::CompressBlob( + input.c_str(), input.size(), compressedSize, 1); + + int errorCode = ZSTDCompressor::GetLastError(); + ASSERT_EQ(errorCode, 0); + + std::string errorString = ZSTDCompressor::GetErrorString(errorCode); + ASSERT_EQ(errorString, "No error detected"); +} + +// Test passing empty input to (de)compress. +// There should be no error and the output should be empty. +TEST_F(syclCompressCorrectnessTest, EmptyInputTest) { + std::string input = ""; + size_t compressedSize = 0; + auto compressedData = ZSTDCompressor::CompressBlob( + input.c_str(), input.size(), compressedSize, 1); + + ASSERT_NE(compressedData, nullptr); + ASSERT_GT(compressedSize, 0); + ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); + + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_EQ(decompressedSize, 0); + ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); + + std::string decompressedStr(decompressedData.get(), decompressedSize); + ASSERT_EQ(input, decompressedStr); +} diff --git a/sycl-compress/unit-test/perf/performance-measurement.cpp b/sycl-compress/unit-test/perf/performance-measurement.cpp new file mode 100644 index 0000000000000..1ca740ec1de57 --- /dev/null +++ b/sycl-compress/unit-test/perf/performance-measurement.cpp @@ -0,0 +1,268 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +// Test to measure performance of compress/decompression using sycl-compress +// library. This is not run by default in the test suite. + +// Takes input the dataset of SPIRV files and (de)compresses them using ZSTD. +// Stores the compression, decompression time in a CSV file. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_WORKLOADS 1 +#define MAX_WORKLOAD_SIZE 1024 * 1024 * 100 // 100 MB +#define ZSTD_MIN_COMPRESSION_LEVEL 1 +#define ZSTD_MAX_COMPRESSION_LEVEL 22 + +// Generate a random buffer of data with size in the range [1, +// MAX_WORKLOAD_SIZE] Return the buffer and its size (in workloadSize) +const char *GenerateRandonWorkload(size_t &workloadSize) { + + // Get randon size in the range [1, MAX_WORKLOAD_SIZE] + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(1, MAX_WORKLOAD_SIZE); + workloadSize = static_cast(dis(gen)); + + // Allocate heap buffer. + char *wokload = static_cast(malloc(workloadSize)); + + // Populate buffer with random data. + std::generate(wokload, wokload + workloadSize, + [&]() { return static_cast(dis(gen)); }); + + return wokload; +} + +// Compress workload using ZSTD and the supplied compression level. +// Returns the time taken to compress the workload and the compressed size. +std::chrono::nanoseconds CompressWorkload(const char *workload, + size_t workloadSize, int level, + char *&compressedData, + size_t &compressedSize) { + auto start = std::chrono::high_resolution_clock::now(); + char *compressed = + compressBlob(workload, workloadSize, compressedSize, level); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds duration = + std::chrono::duration_cast(end - start); + compressedData = compressed; + return duration; +} + +// Decompress workload using ZSTD. +// Returns the time taken to decompress the workload. +std::chrono::nanoseconds DecompressWorkload(const char *compressedData, + size_t compressedSize, + char *&decompressedData, + size_t &decompressedSize) { + auto start = std::chrono::high_resolution_clock::now(); + char *decompressed = + decompressBlob(compressedData, compressedSize, decompressedSize); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds duration = + std::chrono::duration_cast(end - start); + decompressedData = decompressed; + return duration; +} + +// Measure the performance of compressing and decompressing random workloads +// NUM_WORKLOADS times at a give compression level. +// Returns a vector of vectors where each inner vector contains the time taken +// to compress and decompress a workload, workload size, and the compressed +// size. +std::vector> MeasurePerformance(int level) { + std::vector> results; + for (int i = 0; i < NUM_WORKLOADS; i++) { + + // Generate random workload. + size_t workloadSize; + const char *workload = GenerateRandonWorkload(workloadSize); + + // Compress. + size_t compressedSize; + char *compressedData; + auto compressDuration = CompressWorkload(workload, workloadSize, level, + compressedData, compressedSize); + + // Decompress. + size_t decompressedSize; + char *decompressedData; + auto decompressDuration = DecompressWorkload( + compressedData, compressedSize, decompressedData, decompressedSize); + + // Check the size of the decompressed data is same as the original workload. + if (workloadSize != decompressedSize) { + std::cerr + << "Error: Decompressed size is not same as original workload size\n"; + std::cerr << "Workload size: " << workloadSize + << " Decompressed size: " << decompressedSize << "\n"; + exit(1); + } + + // Save results + results.push_back({static_cast(compressDuration.count()), + static_cast(decompressDuration.count()), + static_cast(workloadSize), + static_cast(compressedSize)}); + + free(const_cast(workload)); + free(compressedData); + free(decompressedData); + } + return results; +} + +// Run workloads for different compression levels between +// ZSTD_MIN_COMPRESSION_LEVEL and ZSTD_MAX_COMPRESSION_LEVEL. Saves the result +// in a CSV file, with a user-supplied name, with the following columns: +// Compression level | Workload size | Compressed size | Compress duration | +// Decompress duration. +void RunWorkloads(const std::string &outputFile) { + + // Write results to a CSV file. + // Clear the file if it already exists. + std::ofstream file; + file.open(outputFile, std::ofstream::out | std::ofstream::trunc); + + // Write header. + file << "Compression level,Workload size,Compressed size,Compress duration," + "Decompress duration\n"; + + try { + for (int level = ZSTD_MIN_COMPRESSION_LEVEL; + level <= ZSTD_MAX_COMPRESSION_LEVEL; level++) { + + std::cout << "Running workloads for compression level: " << level << "\n"; + auto levelResults = MeasurePerformance(level); + for (const auto &result : levelResults) { + file << level << "," << result[2] << "," << result[3] << "," + << result[0] << "," << result[1] << "\n"; + } + file.flush(); + } + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << "\n"; + } + + file.close(); +} + +// Takes a full file path as input, loads it into a buffer and (de)compress it +// with different levels. Returns a vector of vectors containing the +// (de)compression time, workload size, and compressed size, for each level. +std::vector> +MeasurePerformanceOfFileCompression(const std::string &filePath) { + std::vector> results; + std::ifstream file(filePath, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + std::cerr << "Error: Could not open file: " << filePath << "\n"; + exit(1); + } + + // Get file size. + size_t fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + // Allocate buffer to hold file data. + char *fileData = static_cast(malloc(fileSize)); + file.read(fileData, fileSize); + file.close(); + + for (int level = ZSTD_MIN_COMPRESSION_LEVEL; + level <= ZSTD_MAX_COMPRESSION_LEVEL; level++) { + std::cout << "Running workloads for compression level: " << level << "\n"; + + // Compress. + size_t compressedSize; + char *compressedData; + auto compressDuration = CompressWorkload(fileData, fileSize, level, + compressedData, compressedSize); + + // Decompress. + size_t decompressedSize; + char *decompressedData; + auto decompressDuration = DecompressWorkload( + compressedData, compressedSize, decompressedData, decompressedSize); + + // Check the size of the decompressed data is same as the original workload. + if (fileSize != decompressedSize) { + std::cerr + << "Error: Decompressed size is not same as original workload size\n"; + std::cerr << "Workload size: " << fileSize + << " Decompressed size: " << decompressedSize << "\n"; + exit(1); + } + assert(level >= 0); + results.push_back({static_cast(level), fileSize, compressedSize, + static_cast(compressDuration.count()), + static_cast(decompressDuration.count())}); + free(compressedData); + free(decompressedData); + } + + free(fileData); + return results; +} + +// Given a directory and output file name, iterate over all files in the +// directory with extension .spv or .spirv and compress/decompress them with +// different levels. Save the results in a CSV file. +void RunWorkloadsForFiles(const std::string &directory, + const std::string &outputFile) { + + // Check validity of the input directory path and output file. + if (!std::filesystem::exists(directory)) { + std::cerr << "Error: Directory does not exist: " << directory << "\n"; + exit(1); + } + + // Write results to a CSV file. + // Clear the file if it already exists. + std::ofstream file; + file.open(outputFile, std::ofstream::out | std::ofstream::trunc); + + // Write header. + file << "FileName, Compression level,Workload size,Compressed size,Compress " + "duration," + "Decompress duration\n"; + + for (const auto &entry : std::filesystem::directory_iterator(directory)) { + std::string filePath = entry.path().string(); + if (filePath.find(".spv") != std::string::npos || + filePath.find(".spirv") != std::string::npos) { + std::cout << "Running workloads for file: " << filePath << "\n"; + auto results = MeasurePerformanceOfFileCompression(filePath); + + for (const auto &result : results) { + + file << filePath << "," << result[0] << "," << result[1] << "," + << result[2] << "," << result[3] << "," << result[4] << "\n"; + } + file.flush(); + } + } + + file.close(); +} + +int main(int argc, char *argv[]) { + if (argc != 3) { + std::cerr << "Usage: " << argv[0] << " \n"; + return 1; + } + + RunWorkloadsForFiles(argv[2], argv[1]); + return 0; +} From 71abfce67dbf8650721f32e3f707ed5098c6dd70 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 31 Aug 2024 15:07:58 -0700 Subject: [PATCH 20/50] Reuse already existing clang options for compression. Use LLVMSupport instead of sycl-compress in ClangOffloadWrapper --- .../clang/Basic/DiagnosticDriverKinds.td | 4 +- clang/include/clang/Driver/Options.td | 13 -- clang/lib/Driver/ToolChains/Clang.cpp | 26 +--- .../tools/clang-linker-wrapper/CMakeLists.txt | 3 - .../ClangLinkerWrapper.cpp | 2 - .../clang-offload-wrapper/CMakeLists.txt | 3 +- .../ClangOffloadWrapper.cpp | 121 ++++++------------ 7 files changed, 49 insertions(+), 123 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 192bbd9adca43..ebafae0a2262d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -896,7 +896,7 @@ def err_drv_triple_version_invalid : Error< def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; -def warn_sycl_compress_opt_ignored : Warning< - "CLI option %0 ignored as it can not be used without -fsycl_compress_dev_imgs">, +def warn_compress_opt_ignored : Warning< + "CLI option %0 ignored as it can not be used without -offload-compress">, InGroup, DefaultIgnore; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 41dab07096991..4b335becf9736 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6981,19 +6981,6 @@ def : Flag<["-"], "fno-sycl-explicit-simd">, Flags<[Deprecated]>, Group, HelpText<"Disable SYCL explicit SIMD extension. (deprecated)">; -// CLI options of device image compression. -// -fsycl-compress-dev-imgs -// -fsycl-compress-level= -// -fsycl-compress-threshold= -def fsycl_compress_dev_imgs : Flag<["-"], "fsycl-compress-dev-imgs">, - HelpText<"Enables compression of device images.">; -def fsycl_compress_level_EQ : Joined<["-"], "fsycl-compress-level=">, - HelpText<"ZSTD level to compress device images with. Should be used" - "only if -fsycl-compress-dev-imgs is enabled. (Default: 10)">; -def fsycl_compress_threshold_EQ : Joined<["-"], "fsycl-compress-threshold=">, - HelpText<"Specify the minimum device image size (in bytes) for compression. Should be used" - "only if -fsycl-compress-dev-imgs is enabled. (Default: 1024)">; - // OS-specific options let Flags = [TargetSpecific] in { defm android_pad_segment : BooleanFFlag<"android-pad-segment">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6013cf6cd13ed..ce605f6305cd2 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10002,36 +10002,24 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, // Validate and propogate CLI options related to device image compression. { - // -fsycl-compress-dev-imgs + // -offload-compress bool isImgCompress = false; - if (C.getInputArgs().getLastArg(options::OPT_fsycl_compress_dev_imgs)) { + if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { isImgCompress = true; WrapperArgs.push_back( - C.getArgs().MakeArgString(Twine("-sycl-compress-dev-imgs"))); + C.getArgs().MakeArgString(Twine("-offload-compress"))); } - // -fsycl-compress-level=<> + // -offload-compression-level=<> if (Arg *A = C.getInputArgs().getLastArg( - options::OPT_fsycl_compress_level_EQ)) { + options::OPT_offload_compression_level_EQ)) { if (!isImgCompress) - C.getDriver().Diag(diag::warn_sycl_compress_opt_ignored) + C.getDriver().Diag(diag::warn_compress_opt_ignored) << A->getAsString(C.getInputArgs()); else WrapperArgs.push_back(C.getArgs().MakeArgString( - Twine("-sycl-compress-level=") + A->getValue())); - } - - // -fsycl-compress-threshold=<> - if (Arg *A = C.getInputArgs().getLastArg( - options::OPT_fsycl_compress_threshold_EQ)) { - - if (!isImgCompress) - C.getDriver().Diag(diag::warn_sycl_compress_opt_ignored) - << A->getAsString(C.getInputArgs()); - else - WrapperArgs.push_back(C.getArgs().MakeArgString( - Twine("-sycl-compress-threshold=") + A->getValue())); + Twine("-offload-compression-level=") + A->getValue())); } } diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt index 9e4fd7a5b4bfe..9dc1f244f2802 100644 --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -34,8 +34,6 @@ add_clang_tool(clang-linker-wrapper ${tablegen_deps} ) -add_dependencies(clang-linker-wrapper sycl-compress) - set(CLANG_LINKER_WRAPPER_LIB_DEPS clangBasic ) @@ -43,7 +41,6 @@ set(CLANG_LINKER_WRAPPER_LIB_DEPS target_link_libraries(clang-linker-wrapper PRIVATE ${CLANG_LINKER_WRAPPER_LIB_DEPS} - sycl-compress ) export_executable_symbols_for_plugins(clang-linker-wrapper) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 0a4dde759375c..e7294a314bc8a 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -67,8 +67,6 @@ #include #include -#include - #define COMPILE_OPTS "compile-opts" #define LINK_OPTS "link-opts" diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index 1e3bb9ef0ca08..3195f18fe23cd 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -10,13 +10,12 @@ add_clang_tool(clang-offload-wrapper set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS clangBasic + LLVMSupport ) -add_dependencies(clang-offload-wrapper sycl-compress) add_dependencies(clang clang-offload-wrapper) clang_target_link_libraries(clang-offload-wrapper PRIVATE ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} - sycl-compress ) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index d696550ac2e5d..73c788ebff76d 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -67,8 +67,8 @@ #include #include +// For device image compression. #include -#include #define OPENMP_OFFLOAD_IMAGE_VERSION "1.0" @@ -135,7 +135,7 @@ static cl::opt cl::value_desc("filename"), cl::cat(ClangOffloadWrapperCategory)); -static cl::opt Verbose("v", cl::desc("verbose output"), +static cl::opt Verbose("v", cl::init(true), cl::desc("verbose output"), cl::cat(ClangOffloadWrapperCategory)); static cl::list Inputs(cl::Positional, cl::OneOrMore, @@ -144,21 +144,16 @@ static cl::list Inputs(cl::Positional, cl::OneOrMore, // CLI options for device image compression. // TODO: Turn off this option by default. -static cl::opt - SYCLCompressDevImg("sycl-compress-dev-imgs", cl::init(true), cl::Optional, - cl::desc("Enable device image compression using ZSTD."), - cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt - SYCLCompressLevel("sycl-compress-level", cl::init(10), cl::Optional, - cl::desc("ZSTD Compression level. Default: 10"), - cl::cat(ClangOffloadWrapperCategory)); +static cl::opt OffloadCompressDevImgs( + "offload-compress", cl::init(true), cl::Optional, + cl::desc("Enable device image compression using ZSTD."), + cl::cat(ClangOffloadWrapperCategory)); static cl::opt - SYCLCompressThreshold("sycl-compress-threshold", cl::init(1024), - cl::Optional, - cl::desc("ZSTD Compression threshold. Default: 1024"), - cl::cat(ClangOffloadWrapperCategory)); + OffloadCompressLevel("offload-compression-level", cl::init(10), + cl::Optional, + cl::desc("ZSTD Compression level. Default: 10"), + cl::cat(ClangOffloadWrapperCategory)); // Binary image formats supported by this tool. The support basically means // mapping string representation given at the command line to a value from this @@ -167,12 +162,9 @@ enum BinaryImageFormat { none, // image kind is not determined native, // image kind is native // portable image kinds go next - spirv, // SPIR-V - llvmbc, // LLVM bitcode - compressed_none, // compressed image with unknown format - compressed_native, // compressed native format - compressed_spirv, // compressed SPIR-V - compressed_llvmbc // compressed LLVM bitcode + spirv, // SPIR-V + llvmbc, // LLVM bitcode + compressed_none // compressed image with unknown format }; /// Sets offload kind. @@ -292,12 +284,6 @@ static StringRef formatToString(BinaryImageFormat Fmt) { return "native"; case BinaryImageFormat::compressed_none: return "compressed_none"; - case BinaryImageFormat::compressed_native: - return "compressed_native"; - case BinaryImageFormat::compressed_spirv: - return "compressed_spirv"; - case BinaryImageFormat::compressed_llvmbc: - return "compressed_llvmbc"; } llvm_unreachable("bad format"); @@ -1118,69 +1104,40 @@ class BinaryWrapper { } else { // Don't compress if the user explicitly specifies the binary image - // format or if the image is smaller than the threshold. - if (Kind != OffloadKind::SYCL || !SYCLCompressDevImg || + // format or if the image is smaller than 512 bytes. + if (Kind != OffloadKind::SYCL || !OffloadCompressDevImgs || Img.Fmt != BinaryImageFormat::none || - static_cast(Bin->getBufferSize()) < SYCLCompressThreshold) { + !llvm::compression::zstd::isAvailable() || + static_cast(Bin->getBufferSize()) < 512) { Fbin = addDeviceImageToModule( ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, Img.Tgt); } else { + SmallVector CompressedBuffer; + + llvm::compression::zstd::compress( + ArrayRef( + (const unsigned char *)(Bin->getBufferStart()), + Bin->getBufferSize()), + CompressedBuffer, OffloadCompressLevel); + if (Verbose) - errs() << " Compressing device image\n"; - - size_t dstSize; - auto CompressedBuffer = - std::move(sycl_compress::ZSTDCompressor::CompressBlob( - Bin->getBufferStart(), Bin->getBufferSize(), dstSize, - SYCLCompressLevel)); - - if (sycl_compress::ZSTDCompressor::GetLastError()) { - if (Verbose) { - errs() << " Compression failed with error:" - << (char *)CompressedBuffer.get() << "\n"; - errs() << " Falling back to uncompressed image\n"; - } - - Fbin = addDeviceImageToModule( - ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), - Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, - Img.Tgt); - } else { - if (Verbose) - errs() << " Compression succeeded. Original image size:" - << Bin->getBufferSize() - << " Compressed image size:" << dstSize << "\n"; - - Fbin = addDeviceImageToModule( - ArrayRef((const char *)CompressedBuffer.get(), dstSize), - Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, - Img.Tgt); - - // Change SPRIV format -> compressed SPIRV format. - BinaryImageFormat CompressedImgFmt; - switch (Img.Fmt) { - case BinaryImageFormat::none: - CompressedImgFmt = BinaryImageFormat::compressed_none; - break; - case BinaryImageFormat::native: - CompressedImgFmt = BinaryImageFormat::compressed_native; - break; - case BinaryImageFormat::spirv: - CompressedImgFmt = BinaryImageFormat::compressed_spirv; - break; - case BinaryImageFormat::llvmbc: - CompressedImgFmt = BinaryImageFormat::compressed_llvmbc; - break; - default: - return createStringError(errc::invalid_argument, - "unsupported image format"); - } - - Ffmt = ConstantInt::get(Type::getInt8Ty(C), CompressedImgFmt); - } + errs() << " Compression succeeded. Original image size:" + << Bin->getBufferSize() + << " Compressed image size:" << CompressedBuffer.size() + << "\n"; + + Fbin = addDeviceImageToModule( + ArrayRef((const char *)CompressedBuffer.data(), + CompressedBuffer.size()), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + + // Change image format to compressed_non. + Ffmt = ConstantInt::get(Type::getInt8Ty(C), + BinaryImageFormat::compressed_none); } } From fae79066380c68c4c24fcd510f54db2b441905fd Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 31 Aug 2024 16:56:20 -0700 Subject: [PATCH 21/50] Use LLVMSupport in SYCL RT to decompress; Remove sycl-compress library :( --- buildbot/configure.py | 5 +- .../ClangOffloadWrapper.cpp | 2 +- llvm/include/llvm/Support/Compression.h | 2 + llvm/lib/Support/Compression.cpp | 4 + sycl-compress/CMakeLists.txt | 86 ------ .../include/sycl-compress/sycl-compress.h | 57 ---- sycl-compress/src/sycl-compress.cpp | 113 -------- .../unit-test/functional/CMakeLists.txt | 30 -- .../unit-test/functional/api-test.cpp | 88 ------ .../perf/performance-measurement.cpp | 268 ------------------ sycl/CMakeLists.txt | 2 - sycl/source/CMakeLists.txt | 10 +- sycl/source/detail/compiler.hpp | 5 +- sycl/source/detail/device_binary_image.cpp | 57 ++-- sycl/source/detail/device_binary_image.hpp | 5 +- .../program_manager/program_manager.cpp | 15 +- sycl/test/lit.cfg.py | 2 +- 17 files changed, 46 insertions(+), 705 deletions(-) delete mode 100644 sycl-compress/CMakeLists.txt delete mode 100644 sycl-compress/include/sycl-compress/sycl-compress.h delete mode 100644 sycl-compress/src/sycl-compress.cpp delete mode 100644 sycl-compress/unit-test/functional/CMakeLists.txt delete mode 100644 sycl-compress/unit-test/functional/api-test.cpp delete mode 100644 sycl-compress/unit-test/perf/performance-measurement.cpp diff --git a/buildbot/configure.py b/buildbot/configure.py index aa6f4e24a4385..3f55da1769965 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -21,7 +21,7 @@ def do_configure(args): if not os.path.isdir(abs_obj_dir): os.makedirs(abs_obj_dir) - llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw;sycl-compress" + llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw" # libdevice build requires a working SYCL toolchain, which is not the case # with macOS target right now. @@ -44,7 +44,6 @@ def do_configure(args): spirv_dir = os.path.join(abs_src_dir, "llvm-spirv") xpti_dir = os.path.join(abs_src_dir, "xpti") xptifw_dir = os.path.join(abs_src_dir, "xptifw") - sycl_compress_dir = os.path.join(abs_src_dir, "sycl-compress") libdevice_dir = os.path.join(abs_src_dir, "libdevice") jit_dir = os.path.join(abs_src_dir, "sycl-jit") llvm_targets_to_build = args.host_target @@ -174,12 +173,12 @@ def do_configure(args): "-DLLVM_EXTERNAL_XPTI_SOURCE_DIR={}".format(xpti_dir), "-DXPTI_SOURCE_DIR={}".format(xpti_dir), "-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR={}".format(xptifw_dir), - "-DLLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR={}".format(sycl_compress_dir), "-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR={}".format(libdevice_dir), "-DLLVM_EXTERNAL_SYCL_JIT_SOURCE_DIR={}".format(jit_dir), "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", + "-DLLVM_ENABLE_ZSTD=ON", "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 73c788ebff76d..eaa6f6d785ecb 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -135,7 +135,7 @@ static cl::opt cl::value_desc("filename"), cl::cat(ClangOffloadWrapperCategory)); -static cl::opt Verbose("v", cl::init(true), cl::desc("verbose output"), +static cl::opt Verbose("v", cl::desc("verbose output"), cl::cat(ClangOffloadWrapperCategory)); static cl::list Inputs(cl::Positional, cl::OneOrMore, diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index 2a8da9e96d356..47614ae66385c 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -71,6 +71,8 @@ Error decompress(ArrayRef Input, uint8_t *Output, Error decompress(ArrayRef Input, SmallVectorImpl &Output, size_t UncompressedSize); +// Get the size of the decompressed data. +uint32_t getDecompressedSize(ArrayRef Input); } // End of namespace zstd enum class Format { diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index badaf68ab59cd..ddbf1266369fb 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -224,6 +224,10 @@ Error zstd::decompress(ArrayRef Input, return E; } +uint32_t zstd::getDecompressedSize(ArrayRef Input) { + return ZSTD_getFrameContentSize(Input.data(), Input.size()); +} + #else bool zstd::isAvailable() { return false; } void zstd::compress(ArrayRef Input, diff --git a/sycl-compress/CMakeLists.txt b/sycl-compress/CMakeLists.txt deleted file mode 100644 index 4fda495d49669..0000000000000 --- a/sycl-compress/CMakeLists.txt +++ /dev/null @@ -1,86 +0,0 @@ -cmake_minimum_required(VERSION 3.20.0) - -set(SYCL_COMPRESS_VERSION 0.0.7) -project (sycl-compress VERSION "${SYCL_COMPRESS_VERSION}" LANGUAGES CXX) - -# Setting the same version as SYCL -set(CMAKE_CXX_STANDARD 17) - -set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE) - -set(SYCL_COMPRESS_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) -set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) - -# Download and build zstd statically. -set(ZSTD_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/zstd/install) - -if (WIN32) - set(ZSTD_LIBRARY_NAME zstd_static.lib) -else() - set(ZSTD_LIBRARY_NAME libzstd.a) -endif() - -include(ExternalProject) -ExternalProject_Add(zstd - DEPENDS - URL https://github.com/facebook/zstd/releases/download/v1.5.6/zstd-1.5.6.tar.gz - URL_HASH SHA256=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 - SOURCE_SUBDIR build/cmake - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZSTD_INSTALL_DIR} -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_MULTITHREAD_SUPPORT=OFF - BUILD_BYPRODUCTS "${ZSTD_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${ZSTD_LIBRARY_NAME}" - DOWNLOAD_EXTRACT_TIMESTAMP ON - ) - -set(ZSTD_LIBRARY ${ZSTD_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${ZSTD_LIBRARY_NAME}) -set(ZSTD_INCLUDE_DIR ${ZSTD_INSTALL_DIR}/include) - -# Setup sycl-compress library. Add dependency on zstd. -set(SOURCES - src/sycl-compress.cpp - include/sycl-compress/sycl-compress.h -) - -add_library(sycl-compress SHARED ${SOURCES}) - -add_dependencies(sycl-compress zstd) -target_compile_definitions(sycl-compress PRIVATE SYCL_COMPRESS_BUILD) -target_include_directories(sycl-compress PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${ZSTD_INCLUDE_DIR}) -target_include_directories(sycl-compress PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) -target_link_libraries(sycl-compress PRIVATE ${ZSTD_LIBRARY}) - -# Set the location of the library installation -include(GNUInstallDirs) -install(TARGETS sycl-compress - LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress - ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT sycl-compress -) - -if (LLVM_BINARY_DIR) - file(GLOB_RECURSE SYCL_COMPRESS_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress/*") - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "${LLVM_BINARY_DIR}" - SYCL_COMPRESS_HEADERS_OUT_LIST "${SYCL_COMPRESS_HEADERS_LIST}") - add_custom_target(sycl-compress-headers - DEPENDS ${SYCL_COMPRESS_HEADERS_OUT_LIST}) - - add_custom_command( - OUTPUT ${SYCL_COMPRESS_HEADERS_OUT_LIST} - DEPENDS ${SYCL_COMPRESS_HEADERS_LIST} - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress - ${LLVM_BINARY_DIR}/include/sycl-compress - COMMENT "Copying sycl-compress headers ..." - ) - add_dependencies(sycl-compress sycl-compress-headers zstd) -endif() - -include(GNUInstallDirs) -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/sycl-compress - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - COMPONENT sycl-compress -) - -# sycl-compress tests can be run using the check-sycl-compress target -add_custom_target(check-sycl-compress) -# Add tests -add_subdirectory(unit-test/functional) diff --git a/sycl-compress/include/sycl-compress/sycl-compress.h b/sycl-compress/include/sycl-compress/sycl-compress.h deleted file mode 100644 index 0f8cf543a8771..0000000000000 --- a/sycl-compress/include/sycl-compress/sycl-compress.h +++ /dev/null @@ -1,57 +0,0 @@ -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -#pragma once - -#include - -namespace sycl_compress { - -// Singleton class to handle ZSTD compression and decompression. -class -#ifdef _WIN32 -#ifdef SYCL_COMPRESS_BUILD - __declspec(dllexport) // When building sycl-compress -#else - __declspec(dllimport) // When using sycl-compress headers in dependencies. -#endif -#endif - ZSTDCompressor { -private: - ZSTDCompressor(); - ~ZSTDCompressor(); - - ZSTDCompressor(const ZSTDCompressor &) = delete; - ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; - - // Get the singleton instance of the ZSTDCompressor class. - static ZSTDCompressor &GetSingletonInstance(); - - // Public APIs -public: - // Return 0 is last (de)compression was successful, otherwise return error - // code. - static int GetLastError(); - - // Returns a string representation of the error code. - // If the error code is 0, it returns "No error detected". - static std::string GetErrorString(int code); - - // Blob (de)compression do not assume format/structure of the input buffer. - static std::unique_ptr CompressBlob(const char *src, size_t srcSize, - size_t &dstSize, int level); - - static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, - size_t &dstSize); - - // Data fields -private: - int m_lastError; - // ZSTD context. Reusing ZSTD context speeds up subsequent (de)compression. - // Storing as void* to avoid including ZSTD headers in this file. - void *m_ZSTD_compression_ctx; - void *m_ZSTD_decompression_ctx; -}; -} // namespace sycl_compress diff --git a/sycl-compress/src/sycl-compress.cpp b/sycl-compress/src/sycl-compress.cpp deleted file mode 100644 index df2c834020775..0000000000000 --- a/sycl-compress/src/sycl-compress.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -#include -#include - -#include -#include - -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) - -namespace sycl_compress { - -// Singleton instance of the ZSTDCompressor class. -ZSTDCompressor &ZSTDCompressor::GetSingletonInstance() { - static ZSTDCompressor instance; - return instance; -} - -// Initialize ZSTD context and error code. -ZSTDCompressor::ZSTDCompressor() { - m_ZSTD_compression_ctx = static_cast(ZSTD_createCCtx()); - m_ZSTD_decompression_ctx = static_cast(ZSTD_createDCtx()); - - if (!m_ZSTD_compression_ctx || !m_ZSTD_decompression_ctx) { - std::cerr << "Error creating ZSTD contexts. \n"; - } - - m_lastError = 0; -} - -// Free ZSTD contexts. -ZSTDCompressor::~ZSTDCompressor() { - ZSTD_freeCCtx(static_cast(m_ZSTD_compression_ctx)); - ZSTD_freeDCtx(static_cast(m_ZSTD_decompression_ctx)); -} - -std::unique_ptr ZSTDCompressor::CompressBlob(const char *src, - size_t srcSize, - size_t &dstSize, int level) { - - auto &instance = GetSingletonInstance(); - - // Get maximum size of the compressed buffer and allocate it. - auto dstBufferSize = ZSTD_compressBound(srcSize); - auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); - - // Compress the input buffer. - dstSize = ZSTD_compressCCtx( - static_cast(instance.m_ZSTD_compression_ctx), - static_cast(dstBuffer.get()), dstBufferSize, - static_cast(src), srcSize, level); - - // Store the error code if compression failed. - if (ZSTD_isError(dstSize)) - instance.m_lastError = dstSize; - else - instance.m_lastError = 0; - - // Pass ownership of the buffer to the caller. - return std::move(dstBuffer); -} - -std::unique_ptr ZSTDCompressor::DecompressBlob(const char *src, - size_t srcSize, - size_t &dstSize) { - - auto &instance = GetSingletonInstance(); - - // Size of decompressed image can be larger than what we can allocate - // on heap. In that case, we need to use streaming decompression. - // TODO: Throw if the decompression size is too large. - auto dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); - - if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || - dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { - - std::cerr << "Error determining size of uncompressed data\n"; - dstSize = 0; - instance.m_lastError = dstBufferSize; - return nullptr; - } - - // Allocate buffer for decompressed data. - auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); - - dstSize = ZSTD_decompressDCtx( - static_cast(instance.m_ZSTD_decompression_ctx), - static_cast(dstBuffer.get()), dstBufferSize, - static_cast(src), srcSize); - - // In case of decompression error, return the error message and set dstSize to - // 0. - if (ZSTD_isError(dstSize)) { - instance.m_lastError = dstSize; - dstSize = 0; - } - - // Pass ownership of the buffer to the caller. - return std::move(dstBuffer); -} - -int ZSTDCompressor::GetLastError() { - return GetSingletonInstance().m_lastError; -} - -std::string ZSTDCompressor::GetErrorString(int code) { - return ZSTD_getErrorName(code); -} -} // namespace sycl_compress diff --git a/sycl-compress/unit-test/functional/CMakeLists.txt b/sycl-compress/unit-test/functional/CMakeLists.txt deleted file mode 100644 index 3fd60be6d91f2..0000000000000 --- a/sycl-compress/unit-test/functional/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# if (NOT EXISTS ${XPTI_DIR}) -# message (FATAL_ERROR "Undefined XPTI_DIR variable: Must be set for tests to work!") -# endif() -# include_directories(${XPTI_DIR}/include) - -add_executable(SYCLCompressUnitTests EXCLUDE_FROM_ALL - api-test.cpp -) - -# Silence warnings from GTest -target_include_directories(SYCLCompressUnitTests SYSTEM PRIVATE - ${gtest_SOURCE_DIR}/include - ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include -) - -add_dependencies(SYCLCompressUnitTests sycl-compress) - -target_link_libraries(SYCLCompressUnitTests PRIVATE - llvm_gtest - llvm_gtest_main - sycl-compress - LLVMSupport -) - -add_test(NAME SYCLCompressTests COMMAND SYCLCompressUnitTests) - -add_custom_target(check-sycl-compress-unittest COMMAND - $/SYCLCompressUnitTests) -add_dependencies(check-sycl-compress-unittest SYCLCompressUnitTests) -add_dependencies(check-sycl-compress check-sycl-compress-unittest) diff --git a/sycl-compress/unit-test/functional/api-test.cpp b/sycl-compress/unit-test/functional/api-test.cpp deleted file mode 100644 index f6c81009b2adf..0000000000000 --- a/sycl-compress/unit-test/functional/api-test.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -#include "sycl-compress/sycl-compress.h" - -#include -#include - -using namespace sycl_compress; -class syclCompressCorrectnessTest : public ::testing::Test {}; - -// Simple (de)compression of a string -TEST_F(syclCompressCorrectnessTest, CompressionTest) { - - std::string input = "Hello, World!"; - size_t compressedSize = 0; - auto compressedData = ZSTDCompressor::CompressBlob( - input.c_str(), input.size(), compressedSize, 1); - - ASSERT_NE(compressedData, nullptr); - ASSERT_GT(compressedSize, 0); - - size_t decompressedSize = 0; - auto decompressedData = ZSTDCompressor::DecompressBlob( - compressedData.get(), compressedSize, decompressedSize); - - ASSERT_NE(decompressedData, nullptr); - ASSERT_GT(decompressedSize, 0); - - std::string decompressedStr(decompressedData.get(), decompressedSize); - ASSERT_EQ(input, decompressedStr); -} - -// Test getting error code and error string. -// Intentionally give incorrect input to decompress -// to trigger an error. -TEST_F(syclCompressCorrectnessTest, NegativeErrorTest) { - std::string input = "Hello, World!"; - size_t decompressedSize = 0; - auto compressedData = ZSTDCompressor::DecompressBlob( - input.c_str(), input.size(), decompressedSize); - - int errorCode = ZSTDCompressor::GetLastError(); - ASSERT_NE(errorCode, 0); - - std::string errorString = ZSTDCompressor::GetErrorString(errorCode); - ASSERT_NE(errorString, "No error detected"); -} - -// Test that the error code is 0 after a successful (de)compression. -TEST_F(syclCompressCorrectnessTest, PositiveErrorTest) { - std::string input = "Hello, World!"; - [[maybe_unused]] size_t compressedSize = 0; - [[maybe_unused]] auto compressedData = ZSTDCompressor::CompressBlob( - input.c_str(), input.size(), compressedSize, 1); - - int errorCode = ZSTDCompressor::GetLastError(); - ASSERT_EQ(errorCode, 0); - - std::string errorString = ZSTDCompressor::GetErrorString(errorCode); - ASSERT_EQ(errorString, "No error detected"); -} - -// Test passing empty input to (de)compress. -// There should be no error and the output should be empty. -TEST_F(syclCompressCorrectnessTest, EmptyInputTest) { - std::string input = ""; - size_t compressedSize = 0; - auto compressedData = ZSTDCompressor::CompressBlob( - input.c_str(), input.size(), compressedSize, 1); - - ASSERT_NE(compressedData, nullptr); - ASSERT_GT(compressedSize, 0); - ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); - - size_t decompressedSize = 0; - auto decompressedData = ZSTDCompressor::DecompressBlob( - compressedData.get(), compressedSize, decompressedSize); - - ASSERT_NE(decompressedData, nullptr); - ASSERT_EQ(decompressedSize, 0); - ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); - - std::string decompressedStr(decompressedData.get(), decompressedSize); - ASSERT_EQ(input, decompressedStr); -} diff --git a/sycl-compress/unit-test/perf/performance-measurement.cpp b/sycl-compress/unit-test/perf/performance-measurement.cpp deleted file mode 100644 index 1ca740ec1de57..0000000000000 --- a/sycl-compress/unit-test/perf/performance-measurement.cpp +++ /dev/null @@ -1,268 +0,0 @@ -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// - -// Test to measure performance of compress/decompression using sycl-compress -// library. This is not run by default in the test suite. - -// Takes input the dataset of SPIRV files and (de)compresses them using ZSTD. -// Stores the compression, decompression time in a CSV file. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUM_WORKLOADS 1 -#define MAX_WORKLOAD_SIZE 1024 * 1024 * 100 // 100 MB -#define ZSTD_MIN_COMPRESSION_LEVEL 1 -#define ZSTD_MAX_COMPRESSION_LEVEL 22 - -// Generate a random buffer of data with size in the range [1, -// MAX_WORKLOAD_SIZE] Return the buffer and its size (in workloadSize) -const char *GenerateRandonWorkload(size_t &workloadSize) { - - // Get randon size in the range [1, MAX_WORKLOAD_SIZE] - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(1, MAX_WORKLOAD_SIZE); - workloadSize = static_cast(dis(gen)); - - // Allocate heap buffer. - char *wokload = static_cast(malloc(workloadSize)); - - // Populate buffer with random data. - std::generate(wokload, wokload + workloadSize, - [&]() { return static_cast(dis(gen)); }); - - return wokload; -} - -// Compress workload using ZSTD and the supplied compression level. -// Returns the time taken to compress the workload and the compressed size. -std::chrono::nanoseconds CompressWorkload(const char *workload, - size_t workloadSize, int level, - char *&compressedData, - size_t &compressedSize) { - auto start = std::chrono::high_resolution_clock::now(); - char *compressed = - compressBlob(workload, workloadSize, compressedSize, level); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds duration = - std::chrono::duration_cast(end - start); - compressedData = compressed; - return duration; -} - -// Decompress workload using ZSTD. -// Returns the time taken to decompress the workload. -std::chrono::nanoseconds DecompressWorkload(const char *compressedData, - size_t compressedSize, - char *&decompressedData, - size_t &decompressedSize) { - auto start = std::chrono::high_resolution_clock::now(); - char *decompressed = - decompressBlob(compressedData, compressedSize, decompressedSize); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds duration = - std::chrono::duration_cast(end - start); - decompressedData = decompressed; - return duration; -} - -// Measure the performance of compressing and decompressing random workloads -// NUM_WORKLOADS times at a give compression level. -// Returns a vector of vectors where each inner vector contains the time taken -// to compress and decompress a workload, workload size, and the compressed -// size. -std::vector> MeasurePerformance(int level) { - std::vector> results; - for (int i = 0; i < NUM_WORKLOADS; i++) { - - // Generate random workload. - size_t workloadSize; - const char *workload = GenerateRandonWorkload(workloadSize); - - // Compress. - size_t compressedSize; - char *compressedData; - auto compressDuration = CompressWorkload(workload, workloadSize, level, - compressedData, compressedSize); - - // Decompress. - size_t decompressedSize; - char *decompressedData; - auto decompressDuration = DecompressWorkload( - compressedData, compressedSize, decompressedData, decompressedSize); - - // Check the size of the decompressed data is same as the original workload. - if (workloadSize != decompressedSize) { - std::cerr - << "Error: Decompressed size is not same as original workload size\n"; - std::cerr << "Workload size: " << workloadSize - << " Decompressed size: " << decompressedSize << "\n"; - exit(1); - } - - // Save results - results.push_back({static_cast(compressDuration.count()), - static_cast(decompressDuration.count()), - static_cast(workloadSize), - static_cast(compressedSize)}); - - free(const_cast(workload)); - free(compressedData); - free(decompressedData); - } - return results; -} - -// Run workloads for different compression levels between -// ZSTD_MIN_COMPRESSION_LEVEL and ZSTD_MAX_COMPRESSION_LEVEL. Saves the result -// in a CSV file, with a user-supplied name, with the following columns: -// Compression level | Workload size | Compressed size | Compress duration | -// Decompress duration. -void RunWorkloads(const std::string &outputFile) { - - // Write results to a CSV file. - // Clear the file if it already exists. - std::ofstream file; - file.open(outputFile, std::ofstream::out | std::ofstream::trunc); - - // Write header. - file << "Compression level,Workload size,Compressed size,Compress duration," - "Decompress duration\n"; - - try { - for (int level = ZSTD_MIN_COMPRESSION_LEVEL; - level <= ZSTD_MAX_COMPRESSION_LEVEL; level++) { - - std::cout << "Running workloads for compression level: " << level << "\n"; - auto levelResults = MeasurePerformance(level); - for (const auto &result : levelResults) { - file << level << "," << result[2] << "," << result[3] << "," - << result[0] << "," << result[1] << "\n"; - } - file.flush(); - } - } catch (const std::exception &e) { - std::cerr << "Error: " << e.what() << "\n"; - } - - file.close(); -} - -// Takes a full file path as input, loads it into a buffer and (de)compress it -// with different levels. Returns a vector of vectors containing the -// (de)compression time, workload size, and compressed size, for each level. -std::vector> -MeasurePerformanceOfFileCompression(const std::string &filePath) { - std::vector> results; - std::ifstream file(filePath, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - std::cerr << "Error: Could not open file: " << filePath << "\n"; - exit(1); - } - - // Get file size. - size_t fileSize = file.tellg(); - file.seekg(0, std::ios::beg); - - // Allocate buffer to hold file data. - char *fileData = static_cast(malloc(fileSize)); - file.read(fileData, fileSize); - file.close(); - - for (int level = ZSTD_MIN_COMPRESSION_LEVEL; - level <= ZSTD_MAX_COMPRESSION_LEVEL; level++) { - std::cout << "Running workloads for compression level: " << level << "\n"; - - // Compress. - size_t compressedSize; - char *compressedData; - auto compressDuration = CompressWorkload(fileData, fileSize, level, - compressedData, compressedSize); - - // Decompress. - size_t decompressedSize; - char *decompressedData; - auto decompressDuration = DecompressWorkload( - compressedData, compressedSize, decompressedData, decompressedSize); - - // Check the size of the decompressed data is same as the original workload. - if (fileSize != decompressedSize) { - std::cerr - << "Error: Decompressed size is not same as original workload size\n"; - std::cerr << "Workload size: " << fileSize - << " Decompressed size: " << decompressedSize << "\n"; - exit(1); - } - assert(level >= 0); - results.push_back({static_cast(level), fileSize, compressedSize, - static_cast(compressDuration.count()), - static_cast(decompressDuration.count())}); - free(compressedData); - free(decompressedData); - } - - free(fileData); - return results; -} - -// Given a directory and output file name, iterate over all files in the -// directory with extension .spv or .spirv and compress/decompress them with -// different levels. Save the results in a CSV file. -void RunWorkloadsForFiles(const std::string &directory, - const std::string &outputFile) { - - // Check validity of the input directory path and output file. - if (!std::filesystem::exists(directory)) { - std::cerr << "Error: Directory does not exist: " << directory << "\n"; - exit(1); - } - - // Write results to a CSV file. - // Clear the file if it already exists. - std::ofstream file; - file.open(outputFile, std::ofstream::out | std::ofstream::trunc); - - // Write header. - file << "FileName, Compression level,Workload size,Compressed size,Compress " - "duration," - "Decompress duration\n"; - - for (const auto &entry : std::filesystem::directory_iterator(directory)) { - std::string filePath = entry.path().string(); - if (filePath.find(".spv") != std::string::npos || - filePath.find(".spirv") != std::string::npos) { - std::cout << "Running workloads for file: " << filePath << "\n"; - auto results = MeasurePerformanceOfFileCompression(filePath); - - for (const auto &result : results) { - - file << filePath << "," << result[0] << "," << result[1] << "," - << result[2] << "," << result[3] << "," << result[4] << "\n"; - } - file.flush(); - } - } - - file.close(); -} - -int main(int argc, char *argv[]) { - if (argc != 3) { - std::cerr << "Usage: " << argv[0] << " \n"; - return 1; - } - - RunWorkloadsForFiles(argv[2], argv[1]); - return 0; -} diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 78d9ddea973b7..ee355ade71876 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -368,7 +368,6 @@ add_custom_target(sycl-compiler spirv-to-ir-wrapper sycl-post-link opencl-aot - sycl-compress ) add_custom_target( sycl-runtime-libraries @@ -458,7 +457,6 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-sycl-dev ${XPTIFW_LIBS} ${SYCL_TOOLCHAIN_DEPS} - sycl-compress ) if (WIN32) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 1d530bdcb9c71..818601e5f8286 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,13 +69,9 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() - # Add dependency on sycl-compress. - if (NOT DEFINED LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR) - message (FATAL_ERROR "Undefined LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR variable: Must be set when using sycl-compress") - endif() - include_directories(${LLVM_EXTERNAL_SYCL_COMPRESS_SOURCE_DIR}/include) - add_dependencies(${LIB_NAME} sycl-compress) - target_link_libraries(${LIB_NAME} PRIVATE sycl-compress) + # Need LLVMSUpport for device image compression. + add_dependencies(${LIB_NAME} LLVMSupport) + target_link_libraries(${LIB_NAME} PRIVATE LLVMSupport) target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 7b42fabc85bbd..827ee61ef8110 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -116,10 +116,7 @@ enum sycl_device_binary_type : uint8_t { SYCL_DEVICE_BINARY_TYPE_NATIVE = 1, // specific to a device SYCL_DEVICE_BINARY_TYPE_SPIRV = 2, SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3, - SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4, - SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE = 5, - SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV = 6, - SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE = 7 + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4 }; // Device binary descriptor version supported by this library. diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 9ec8b231c8347..000a54310e942 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -9,7 +9,10 @@ #include #include -#include +// For device image compression. +#include +#include +#include #include #include @@ -234,43 +237,37 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) : RTDeviceBinaryImage() { - // Decompress the binary image. - size_t DecompressedSize = 0; size_t compressedDataSize = static_cast(CompressedBin->BinaryEnd - CompressedBin->BinaryStart); - m_DecompressedData = std::move(sycl_compress::ZSTDCompressor::DecompressBlob( - reinterpret_cast(CompressedBin->BinaryStart), - compressedDataSize, DecompressedSize)); - if (!DecompressedSize) { - std::cerr << "Failed to decompress device binary image\n"; - return; + // Get ArrayRef of compressed data. + llvm::ArrayRef CompressedData( + reinterpret_cast(CompressedBin->BinaryStart), + compressedDataSize); + + // Decompress the binary image. + size_t DecompressedSize = + llvm::compression::zstd::getDecompressedSize(CompressedData); + + m_DecompressedData = + std::unique_ptr(new unsigned char[DecompressedSize]); + + if (llvm::compression::zstd::isAvailable()) { + + auto Err = llvm::compression::zstd::decompress( + CompressedData, m_DecompressedData.get(), DecompressedSize); + + assert(!Err && "Failed to decompress ZSTD data"); + } else { + assert(false && "ZSTD not available"); } Bin = new sycl_device_binary_struct(*CompressedBin); - Bin->BinaryStart = - reinterpret_cast(m_DecompressedData.get()); + Bin->BinaryStart = m_DecompressedData.get(); Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; - // Get the new format. - auto currFormat = static_cast(Bin->Format); - switch (currFormat) { - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: - currFormat = SYCL_DEVICE_BINARY_TYPE_NONE; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: - currFormat = SYCL_DEVICE_BINARY_TYPE_NATIVE; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: - currFormat = SYCL_DEVICE_BINARY_TYPE_SPIRV; - break; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: - currFormat = SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; - break; - default: - break; - } - Bin->Format = currFormat; + // Set the new format to none and let RT determine the format. + Bin->Format = SYCL_DEVICE_BINARY_TYPE_NONE; init(Bin); } diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 95b7ac78c04f0..2c250a2e43da1 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -16,6 +16,9 @@ #include +#include +#include + #include #include #include @@ -290,7 +293,7 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { } private: - std::unique_ptr m_DecompressedData; + std::unique_ptr m_DecompressedData; }; } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 5641d4f443310..d49cee405f3dd 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -173,12 +173,6 @@ static bool isDeviceBinaryTypeSupported(const context &C, return "LLVM IR"; case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: return "compressed none"; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE: - return "compressed native"; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV: - return "compressed SPIR-V"; - case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE: - return "compressed LLVM IR"; } assert(false && "Unknown device image format"); return "unknown"; @@ -2795,14 +2789,7 @@ inline bool ProgramManager::isDeviceImageCompressed(sycl_device_binary Bin) const { auto currFormat = static_cast(Bin->Format); - - if (currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE || - currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NATIVE || - currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_SPIRV || - currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_LLVMIR_BITCODE) - return true; - else - return false; + return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE; } bool doesDevSupportDeviceRequirements(const device &Dev, diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 9ecc82633feba..3205e54d236ba 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -144,7 +144,7 @@ additional_flags = config.sycl_clang_extra_flags.split(" ") # TODO: Remove this. -additional_flags.append("-fsycl-compress-dev-imgs") +additional_flags.append("--offload-compress") if config.cuda == "ON": config.available_features.add("cuda") From 9272d659061f1c9d084652b33b0454b7d4b3e3ea Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 31 Aug 2024 22:50:56 -0700 Subject: [PATCH 22/50] Fix build error when zstd is not present. --- llvm/lib/Support/Compression.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index ddbf1266369fb..bcf40acdb99f4 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -244,4 +244,7 @@ Error zstd::decompress(ArrayRef Input, size_t UncompressedSize) { llvm_unreachable("zstd::decompress is unavailable"); } +uint32_t zstd::getDecompressedSize(ArrayRef Input) { + llvm_unreachable("zstd::decompress is unavailable"); +} #endif From b4a2c0c32c8d92d0f37bf9be58739e74f163c91d Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 5 Sep 2024 18:56:52 -0700 Subject: [PATCH 23/50] link zstd with RT instead of LLVMSupport --- llvm/cmake/modules/Findzstd.cmake | 7 +- sycl/include/sycl/detail/compression.hpp | 134 +++++++++++++++++++++ sycl/source/CMakeLists.txt | 9 +- sycl/source/detail/device_binary_image.cpp | 32 ++--- sycl/source/detail/device_binary_image.hpp | 3 - 5 files changed, 153 insertions(+), 32 deletions(-) create mode 100644 sycl/include/sycl/detail/compression.hpp diff --git a/llvm/cmake/modules/Findzstd.cmake b/llvm/cmake/modules/Findzstd.cmake index 4bc0b793e51c9..9877b6ca63215 100644 --- a/llvm/cmake/modules/Findzstd.cmake +++ b/llvm/cmake/modules/Findzstd.cmake @@ -16,11 +16,12 @@ else() set(zstd_STATIC_LIBRARY_SUFFIX "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") endif() -find_path(zstd_INCLUDE_DIR NAMES zstd.h) -find_library(zstd_LIBRARY NAMES zstd zstd_static) +find_path(zstd_INCLUDE_DIR NAMES zstd.h HINTS $ENV{ZSTD_ROOT}/include) +find_library(zstd_LIBRARY NAMES zstd zstd_static HINTS $ENV{ZSTD_ROOT}/lib) find_library(zstd_STATIC_LIBRARY NAMES zstd_static - "${CMAKE_STATIC_LIBRARY_PREFIX}zstd${CMAKE_STATIC_LIBRARY_SUFFIX}") + "${CMAKE_STATIC_LIBRARY_PREFIX}zstd${CMAKE_STATIC_LIBRARY_SUFFIX}" + HINTS $ENV{ZSTD_ROOT}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( diff --git a/sycl/include/sycl/detail/compression.hpp b/sycl/include/sycl/detail/compression.hpp new file mode 100644 index 0000000000000..9e8021178d123 --- /dev/null +++ b/sycl/include/sycl/detail/compression.hpp @@ -0,0 +1,134 @@ +//==---------- compression.hpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// Singleton class to handle ZSTD compression and decompression. +class ZSTDCompressor { +private: + // Initialize ZSTD context and error code. + ZSTDCompressor() { + m_ZSTD_compression_ctx = static_cast(ZSTD_createCCtx()); + m_ZSTD_decompression_ctx = static_cast(ZSTD_createDCtx()); + + if (!m_ZSTD_compression_ctx || !m_ZSTD_decompression_ctx) { + std::cerr << "Error creating ZSTD contexts. \n"; + } + + m_lastError = 0; + } + + // Free ZSTD contexts. + ~ZSTDCompressor() { + ZSTD_freeCCtx(static_cast(m_ZSTD_compression_ctx)); + ZSTD_freeDCtx(static_cast(m_ZSTD_decompression_ctx)); + } + + ZSTDCompressor(const ZSTDCompressor &) = delete; + ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; + + // Get the singleton instance of the ZSTDCompressor class. + static ZSTDCompressor &GetSingletonInstance() { + static ZSTDCompressor instance; + return instance; + } + + // Public APIs +public: + // Return 0 is last (de)compression was successful, otherwise return error + // code. + static int GetLastError() { return GetSingletonInstance().m_lastError; } + + // Returns a string representation of the error code. + // If the error code is 0, it returns "No error detected". + static std::string GetErrorString(int code) { + return ZSTD_getErrorName(code); + } + + // Blob (de)compression do not assume format/structure of the input buffer. + static std::unique_ptr CompressBlob(const char *src, size_t srcSize, + size_t &dstSize, int level) { + auto &instance = GetSingletonInstance(); + + // Get maximum size of the compressed buffer and allocate it. + auto dstBufferSize = ZSTD_compressBound(srcSize); + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + + // Compress the input buffer. + dstSize = ZSTD_compressCCtx( + static_cast(instance.m_ZSTD_compression_ctx), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize, level); + + // Store the error code if compression failed. + if (ZSTD_isError(dstSize)) + instance.m_lastError = dstSize; + else + instance.m_lastError = 0; + + // Pass ownership of the buffer to the caller. + return std::move(dstBuffer); + } + + static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, + size_t &dstSize) { + auto &instance = GetSingletonInstance(); + + // Size of decompressed image can be larger than what we can allocate + // on heap. In that case, we need to use streaming decompression. + // TODO: Throw if the decompression size is too large. + auto dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); + + if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || + dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { + + std::cerr << "Error determining size of uncompressed data\n"; + dstSize = 0; + instance.m_lastError = dstBufferSize; + return nullptr; + } + + // Allocate buffer for decompressed data. + auto dstBuffer = std::unique_ptr(new unsigned char[dstBufferSize]); + + dstSize = ZSTD_decompressDCtx( + static_cast(instance.m_ZSTD_decompression_ctx), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize); + + // In case of decompression error, return the error message and set dstSize + // to 0. + if (ZSTD_isError(dstSize)) { + instance.m_lastError = dstSize; + dstSize = 0; + } + + // Pass ownership of the buffer to the caller. + return std::move(dstBuffer); + } + + // Data fields +private: + int m_lastError; + // ZSTD context. Reusing ZSTD context speeds up subsequent (de)compression. + // Storing as void* to avoid including ZSTD headers in this file. + void *m_ZSTD_compression_ctx; + void *m_ZSTD_decompression_ctx; +}; +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 818601e5f8286..8699fda09c2e2 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,10 +69,11 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() - # Need LLVMSUpport for device image compression. - add_dependencies(${LIB_NAME} LLVMSupport) - target_link_libraries(${LIB_NAME} PRIVATE LLVMSupport) - + # Need zstd for device image compression. + find_package(zstd REQUIRED) + target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR}) + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) # ur_win_proxy_loader diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 000a54310e942..fd83f51400ede 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -10,9 +10,7 @@ #include // For device image compression. -#include -#include -#include +#include #include #include @@ -240,26 +238,16 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( size_t compressedDataSize = static_cast(CompressedBin->BinaryEnd - CompressedBin->BinaryStart); - // Get ArrayRef of compressed data. - llvm::ArrayRef CompressedData( - reinterpret_cast(CompressedBin->BinaryStart), - compressedDataSize); + size_t DecompressedSize = 0; + m_DecompressedData = std::move(ZSTDCompressor::DecompressBlob( + reinterpret_cast(CompressedBin->BinaryStart), + compressedDataSize, DecompressedSize)); - // Decompress the binary image. - size_t DecompressedSize = - llvm::compression::zstd::getDecompressedSize(CompressedData); - - m_DecompressedData = - std::unique_ptr(new unsigned char[DecompressedSize]); - - if (llvm::compression::zstd::isAvailable()) { - - auto Err = llvm::compression::zstd::decompress( - CompressedData, m_DecompressedData.get(), DecompressedSize); - - assert(!Err && "Failed to decompress ZSTD data"); - } else { - assert(false && "ZSTD not available"); + if (!m_DecompressedData) { + throw sycl::exception( + sycl::make_error_code(sycl::errc::runtime), + "Failed to decompress device binary image. " + + ZSTDCompressor::GetErrorString(ZSTDCompressor::GetLastError())); } Bin = new sycl_device_binary_struct(*CompressedBin); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 2c250a2e43da1..acf3265b0099b 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -16,9 +16,6 @@ #include -#include -#include - #include #include #include From 63389bb32df855929180a39064547b326eb62c7c Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 6 Sep 2024 08:46:45 -0700 Subject: [PATCH 24/50] Fix formatting; Add CMAKE_FIND_DEBUG_MODE to debug why zstd is not found on Win CI machines --- sycl/include/sycl/detail/compression.hpp | 7 ++++--- sycl/source/CMakeLists.txt | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/detail/compression.hpp b/sycl/include/sycl/detail/compression.hpp index 9e8021178d123..fba53adec075b 100644 --- a/sycl/include/sycl/detail/compression.hpp +++ b/sycl/include/sycl/detail/compression.hpp @@ -84,8 +84,8 @@ class ZSTDCompressor { return std::move(dstBuffer); } - static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, - size_t &dstSize) { + static std::unique_ptr + DecompressBlob(const char *src, size_t srcSize, size_t &dstSize) { auto &instance = GetSingletonInstance(); // Size of decompressed image can be larger than what we can allocate @@ -103,7 +103,8 @@ class ZSTDCompressor { } // Allocate buffer for decompressed data. - auto dstBuffer = std::unique_ptr(new unsigned char[dstBufferSize]); + auto dstBuffer = + std::unique_ptr(new unsigned char[dstBufferSize]); dstSize = ZSTD_decompressDCtx( static_cast(instance.m_ZSTD_decompression_ctx), diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index e10f00bea5628..f16d6899de285 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,6 +69,10 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() + # TODO: Remove debug print. Need this to figure out why zstd is not + # being found on Windows CI machines. + set(CMAKE_FIND_DEBUG_MODE 1) + # Need zstd for device image compression. find_package(zstd REQUIRED) target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) From 64878676eaff6c6c62a086b1aa41b0e2752a4b91 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 7 Sep 2024 15:39:52 -0700 Subject: [PATCH 25/50] Add LIT test for driver changes. --- .../clang/Basic/DiagnosticDriverKinds.td | 2 +- .../test/Driver/clang-offload-wrapper-zstd.c | 40 +++++++++++++++++++ .../ClangOffloadWrapper.cpp | 38 +++++++++++++----- 3 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 clang/test/Driver/clang-offload-wrapper-zstd.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 22267c6a523a4..0276ece097945 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -901,6 +901,6 @@ def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; def warn_compress_opt_ignored : Warning< - "CLI option %0 ignored as it can not be used without -offload-compress">, + "CLI option '%0' ignored as it can not be used without '-offload-compress'">, InGroup, DefaultIgnore; } diff --git a/clang/test/Driver/clang-offload-wrapper-zstd.c b/clang/test/Driver/clang-offload-wrapper-zstd.c new file mode 100644 index 0000000000000..96354c3905825 --- /dev/null +++ b/clang/test/Driver/clang-offload-wrapper-zstd.c @@ -0,0 +1,40 @@ +// REQUIRES: zstd && (system-windows || system-linux) + +// clang-offload-wrapper compression test: checks that the wrapper can compress the device images. +// Checks the '--offload-compress', '--offload-compression-level', and '--offload-compression-threshold' +// CLI options. + +// --- Prepare test data by creating the debice binary image. +// RUN: echo -e -n 'device binary image1\n' > %t.bin +// RUN: echo -e -n '[Category1]\nint_prop1=1|10\n[Category2]\nint_prop2=1|20\n' > %t.props +// RUN: echo -e -n 'kernel1\nkernel2\n' > %t.sym +// RUN: echo -e -n 'Manifest file - arbitrary data generated by the toolchain\n' > %t.mnf +// RUN: echo '[Code|Properties|Symbols|Manifest]' > %t.img1 +// RUN: echo %t.bin"|"%t.props"|"%t.sym"|"%t.mnf >> %t.img1 + +/////////////////////////////////////////////////////// +// Compress the test image using clang-offload-wrapper. +/////////////////////////////////////////////////////// + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compress --offload-compression-level=9 --offload-compression-threshold=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS + +// CHECK-COMPRESS: [Compression] Original image size: +// CHECK-COMPRESS: [Compression] Compressed image size: +// CHECK-COMPRESS: [Compression] Compression level used: 9 + +/////////////////////////////////////////////////////////// +// Check that there is no compression when the threshold is set to a value higher than the image size +// or '--offload-compress' is not set. +/////////////////////////////////////////////////////////// + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compress --offload-compression-level=3 --offload-compression-threshold=1000 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compression-level=3 --offload-compression-threshold=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// CHECK-NO-COMPRESS-NOT: [Compression] Original image size: \ No newline at end of file diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index eaa6f6d785ecb..7d1fc238ed436 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -143,9 +143,8 @@ static cl::list Inputs(cl::Positional, cl::OneOrMore, cl::cat(ClangOffloadWrapperCategory)); // CLI options for device image compression. -// TODO: Turn off this option by default. static cl::opt OffloadCompressDevImgs( - "offload-compress", cl::init(true), cl::Optional, + "offload-compress", cl::init(false), cl::Optional, cl::desc("Enable device image compression using ZSTD."), cl::cat(ClangOffloadWrapperCategory)); @@ -155,6 +154,13 @@ static cl::opt cl::desc("ZSTD Compression level. Default: 10"), cl::cat(ClangOffloadWrapperCategory)); +static cl::opt + OffloadCompressThreshold("offload-compression-threshold", cl::init(512), + cl::Optional, + cl::desc("Threshold (in bytes) over which to " + "compress images. Default: 512"), + cl::cat(ClangOffloadWrapperCategory)); + // Binary image formats supported by this tool. The support basically means // mapping string representation given at the command line to a value from this // enum. No format checking is performed. @@ -1103,20 +1109,29 @@ class BinaryWrapper { Fbin = *FBinOrErr; } else { + // If '--offload-compress' option is specified and zstd is not available + // then warn the user that the image will not be compressed. + if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) { + WithColor::warning(errs(), ToolName) + << "'--offload-compress' option is specified but zstd is not " + "available. The device image will not be compressed.\n"; + } + // Don't compress if the user explicitly specifies the binary image - // format or if the image is smaller than 512 bytes. + // format or if the image is smaller than OffloadCompressThreshold + // bytes. if (Kind != OffloadKind::SYCL || !OffloadCompressDevImgs || Img.Fmt != BinaryImageFormat::none || !llvm::compression::zstd::isAvailable() || - static_cast(Bin->getBufferSize()) < 512) { + static_cast(Bin->getBufferSize()) < OffloadCompressThreshold) { Fbin = addDeviceImageToModule( ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, Img.Tgt); } else { + // Compress the image using zstd. SmallVector CompressedBuffer; - llvm::compression::zstd::compress( ArrayRef( (const unsigned char *)(Bin->getBufferStart()), @@ -1124,11 +1139,14 @@ class BinaryWrapper { CompressedBuffer, OffloadCompressLevel); if (Verbose) - errs() << " Compression succeeded. Original image size:" - << Bin->getBufferSize() - << " Compressed image size:" << CompressedBuffer.size() - << "\n"; - + errs() << "[Compression] Original image size: " + << Bin->getBufferSize() << "\n" + << "[Compression] Compressed image size: " + << CompressedBuffer.size() << "\n" + << "[Compression] Compression level used: " + << OffloadCompressLevel << "\n"; + + // Add the compressed image to the module. Fbin = addDeviceImageToModule( ArrayRef((const char *)CompressedBuffer.data(), CompressedBuffer.size()), From 687db23182aea6b73fc14add7620a8137614372a Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 7 Sep 2024 15:48:49 -0700 Subject: [PATCH 26/50] Revert changes in llvm::compression namespace. --- clang/test/Driver/clang-offload-wrapper-zstd.c | 2 +- llvm/include/llvm/Support/Compression.h | 2 -- llvm/lib/Support/Compression.cpp | 7 ------- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/clang/test/Driver/clang-offload-wrapper-zstd.c b/clang/test/Driver/clang-offload-wrapper-zstd.c index 96354c3905825..bc5fadfc4cf42 100644 --- a/clang/test/Driver/clang-offload-wrapper-zstd.c +++ b/clang/test/Driver/clang-offload-wrapper-zstd.c @@ -37,4 +37,4 @@ // RUN: --offload-compression-level=3 --offload-compression-threshold=0 \ // RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS -// CHECK-NO-COMPRESS-NOT: [Compression] Original image size: \ No newline at end of file +// CHECK-NO-COMPRESS-NOT: [Compression] Original image size: diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index 47614ae66385c..2a8da9e96d356 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -71,8 +71,6 @@ Error decompress(ArrayRef Input, uint8_t *Output, Error decompress(ArrayRef Input, SmallVectorImpl &Output, size_t UncompressedSize); -// Get the size of the decompressed data. -uint32_t getDecompressedSize(ArrayRef Input); } // End of namespace zstd enum class Format { diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index bcf40acdb99f4..badaf68ab59cd 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -224,10 +224,6 @@ Error zstd::decompress(ArrayRef Input, return E; } -uint32_t zstd::getDecompressedSize(ArrayRef Input) { - return ZSTD_getFrameContentSize(Input.data(), Input.size()); -} - #else bool zstd::isAvailable() { return false; } void zstd::compress(ArrayRef Input, @@ -244,7 +240,4 @@ Error zstd::decompress(ArrayRef Input, size_t UncompressedSize) { llvm_unreachable("zstd::decompress is unavailable"); } -uint32_t zstd::getDecompressedSize(ArrayRef Input) { - llvm_unreachable("zstd::decompress is unavailable"); -} #endif From ff53221c0e3f0511844fb38d4b3d50d3cc51e5c3 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 7 Sep 2024 16:49:30 -0700 Subject: [PATCH 27/50] Add unit test for (de)compression --- sycl/include/sycl/detail/compression.hpp | 1 + sycl/unittests/CMakeLists.txt | 1 + sycl/unittests/compression/CMakeLists.txt | 3 + .../compression/CompressionTests.cpp | 90 +++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 sycl/unittests/compression/CMakeLists.txt create mode 100644 sycl/unittests/compression/CompressionTests.cpp diff --git a/sycl/include/sycl/detail/compression.hpp b/sycl/include/sycl/detail/compression.hpp index fba53adec075b..cc0728cdc0a2f 100644 --- a/sycl/include/sycl/detail/compression.hpp +++ b/sycl/include/sycl/detail/compression.hpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #pragma once +#include #include #include diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index ec740f913ed4d..c672febd2c911 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -53,6 +53,7 @@ add_subdirectory(accessor) add_subdirectory(handler) add_subdirectory(builtins) add_subdirectory(buffer/l0_specific) +add_subdirectory(compression) # TODO Enable xpti tests for Windows if (NOT WIN32) add_subdirectory(xpti_trace) diff --git a/sycl/unittests/compression/CMakeLists.txt b/sycl/unittests/compression/CMakeLists.txt new file mode 100644 index 0000000000000..742e2d228072b --- /dev/null +++ b/sycl/unittests/compression/CMakeLists.txt @@ -0,0 +1,3 @@ +add_sycl_unittest(CompressionTests OBJECT + CompressionTests.cpp +) diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp new file mode 100644 index 0000000000000..b9620c044990d --- /dev/null +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -0,0 +1,90 @@ +//==------- CompressionTests.cpp --- compression unit test ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include + +using namespace sycl::detail; + +TEST(CompressionTest, SimpleCompression) { + + // Data to compress. + std::string data = "Hello World! I'm about to get compressed :P"; + size_t compressedDataSize = 0; + + auto compressedData = ZSTDCompressor::CompressBlob( + data.c_str(), data.size(), compressedDataSize, /*Compression level*/ 3); + + // Check if compression was successful. + EXPECT_NE(compressedData, nullptr); + EXPECT_GT(compressedDataSize, 0); + + // Decompress the data. + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedDataSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_GT(decompressedSize, 0); + + // Check if decompressed data is same as original data. + std::string decompressedStr((char *)decompressedData.get(), decompressedSize); + ASSERT_EQ(data, decompressedStr); + + // Check that error code is 0 after successful decompression. + int errorCode = ZSTDCompressor::GetLastError(); + ASSERT_EQ(errorCode, 0); + + // Check that error string is "No error detected" after successful + // decompression. + std::string errorString = ZSTDCompressor::GetErrorString(errorCode); + ASSERT_EQ(errorString, "No error detected"); +} + +// Test getting error code and error string. +// Intentionally give incorrect input to decompress +// to trigger an error. +TEST(CompressionTest, NegativeErrorTest) { + std::string input = "Hello, World!"; + size_t decompressedSize = 0; + auto compressedData = ZSTDCompressor::DecompressBlob( + input.c_str(), input.size(), decompressedSize); + + int errorCode = ZSTDCompressor::GetLastError(); + ASSERT_NE(errorCode, 0); + + std::string errorString = ZSTDCompressor::GetErrorString(errorCode); + ASSERT_NE(errorString, "No error detected"); +} + +// Test passing empty input to (de)compress. +// There should be no error and the output should be empty. +TEST(CompressionTest, EmptyInputTest) { + std::string input = ""; + size_t compressedSize = 0; + auto compressedData = ZSTDCompressor::CompressBlob( + input.c_str(), input.size(), compressedSize, 1); + + ASSERT_NE(compressedData, nullptr); + ASSERT_GT(compressedSize, 0); + ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); + + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_EQ(decompressedSize, 0); + ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); + + std::string decompressedStr((char *)decompressedData.get(), decompressedSize); + ASSERT_EQ(input, decompressedStr); +} From 7fb726eec3528015cd66699c98f73e95eec36233 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 8 Sep 2024 10:33:36 -0700 Subject: [PATCH 28/50] Add E2E test for image compression --- sycl/test-e2e/Compression/compression.cpp | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 sycl/test-e2e/Compression/compression.cpp diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp new file mode 100644 index 0000000000000..cd618f5dc258c --- /dev/null +++ b/sycl/test-e2e/Compression/compression.cpp @@ -0,0 +1,24 @@ +// End-to-End test for testing device image compression. +// RUN: %{build} -O0 -g -o %t_not_compress.out +// RUN: %{build} -O0 -g --offload-compress -o %t_compress.out +// RUN: %{run} %t_not_compress.out +// RUN: %{run} %t_compress.out +// RUN: not diff %t_not_compress.out %t_compress.out + +#include + +int main() { + + sycl::queue q0; + int val = -1; + { + sycl::buffer buffer1(&val, sycl::range(1)); + + q0.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(buffer1, cgh); + cgh.single_task([=] { acc[0] = acc[0] + 1; }); + }).wait(); + } + + return !(val == 0); +} \ No newline at end of file From 84f0864b599fbbd8ba8039ea524df550741561f4 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 8 Sep 2024 11:56:48 -0700 Subject: [PATCH 29/50] Update doc; Remove warning. --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 4 ---- clang/lib/Driver/ToolChains/Clang.cpp | 5 +---- sycl/doc/UsersManual.md | 13 +++++++++++++ sycl/test-e2e/Compression/compression.cpp | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 0276ece097945..b6e71082be66d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -899,8 +899,4 @@ def err_drv_triple_version_invalid : Error< def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; - -def warn_compress_opt_ignored : Warning< - "CLI option '%0' ignored as it can not be used without '-offload-compress'">, - InGroup, DefaultIgnore; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b2608276e8dda..bb8f2a302b073 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10102,10 +10102,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = C.getInputArgs().getLastArg( options::OPT_offload_compression_level_EQ)) { - if (!isImgCompress) - C.getDriver().Diag(diag::warn_compress_opt_ignored) - << A->getAsString(C.getInputArgs()); - else + if (isImgCompress) WrapperArgs.push_back(C.getArgs().MakeArgString( Twine("-offload-compression-level=") + A->getValue())); } diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 6a9e128825184..3f184edc12def 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -195,6 +195,19 @@ and not recommended to use in production environment. which may or may not perform additional inlining. Default value is 225. +**`--offload-compress`** + + Enables device image compression for SYCL offloading. Device images + are compressed using `zstd` compression algorithm and only if their size + exceeds 512 bytes. + Default value is false. + +**`--offload-compression-level=`** + + `zstd` compression level used to compress device images when `--offload- + compress` is enabled. + The default value is 10. + ## Target toolchain options **`-Xsycl-target-backend= "options"`** diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp index cd618f5dc258c..13f27ce9e4a69 100644 --- a/sycl/test-e2e/Compression/compression.cpp +++ b/sycl/test-e2e/Compression/compression.cpp @@ -1,6 +1,6 @@ // End-to-End test for testing device image compression. // RUN: %{build} -O0 -g -o %t_not_compress.out -// RUN: %{build} -O0 -g --offload-compress -o %t_compress.out +// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 -o %t_compress.out // RUN: %{run} %t_not_compress.out // RUN: %{run} %t_compress.out // RUN: not diff %t_not_compress.out %t_compress.out From 7fdbd5e57e612e017f8cdecaae5a39a79ab5db23 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 8 Sep 2024 12:02:05 -0700 Subject: [PATCH 30/50] Add line at EOF --- sycl/test-e2e/Compression/compression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp index 13f27ce9e4a69..d17b6fd293e69 100644 --- a/sycl/test-e2e/Compression/compression.cpp +++ b/sycl/test-e2e/Compression/compression.cpp @@ -21,4 +21,4 @@ int main() { } return !(val == 0); -} \ No newline at end of file +} From 312bd3889ebc2487693e9e9b256ade01330e20b0 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 8 Sep 2024 17:43:26 -0700 Subject: [PATCH 31/50] Remove std::move to allow copy ellision; Moved header to source/detail --- sycl/{include/sycl => source}/detail/compression.hpp | 4 ++-- sycl/source/detail/device_binary_image.cpp | 6 +++--- sycl/source/detail/program_manager/program_manager.cpp | 4 ++-- sycl/unittests/compression/CompressionTests.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) rename sycl/{include/sycl => source}/detail/compression.hpp (98%) diff --git a/sycl/include/sycl/detail/compression.hpp b/sycl/source/detail/compression.hpp similarity index 98% rename from sycl/include/sycl/detail/compression.hpp rename to sycl/source/detail/compression.hpp index cc0728cdc0a2f..c106b7861ada1 100644 --- a/sycl/include/sycl/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -82,7 +82,7 @@ class ZSTDCompressor { instance.m_lastError = 0; // Pass ownership of the buffer to the caller. - return std::move(dstBuffer); + return dstBuffer; } static std::unique_ptr @@ -120,7 +120,7 @@ class ZSTDCompressor { } // Pass ownership of the buffer to the caller. - return std::move(dstBuffer); + return dstBuffer; } // Data fields diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index fd83f51400ede..5259f65234d76 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -10,7 +10,7 @@ #include // For device image compression. -#include +#include #include #include @@ -239,9 +239,9 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( CompressedBin->BinaryStart); size_t DecompressedSize = 0; - m_DecompressedData = std::move(ZSTDCompressor::DecompressBlob( + m_DecompressedData = ZSTDCompressor::DecompressBlob( reinterpret_cast(CompressedBin->BinaryStart), - compressedDataSize, DecompressedSize)); + compressedDataSize, DecompressedSize); if (!m_DecompressedData) { throw sycl::exception( diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a99c7c368d77e..6ab21fc3b4085 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1653,9 +1653,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { std::unique_ptr Img; if (isDeviceImageCompressed(RawImg)) - Img = std::move(std::make_unique(RawImg)); + Img = std::make_unique(RawImg); else - Img = std::move(std::make_unique(RawImg)); + Img = std::make_unique(RawImg); static uint32_t SequenceID = 0; diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp index b9620c044990d..77a577cec2a2f 100644 --- a/sycl/unittests/compression/CompressionTests.cpp +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include From 75f28acbcfd3686ac6477134250efc4c9ef06bb5 Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Mon, 9 Sep 2024 08:22:56 -0700 Subject: [PATCH 32/50] Simplify passing argument from clang driver to clang-offload-wrapper Co-authored-by: Michael Toguchi --- clang/lib/Driver/ToolChains/Clang.cpp | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bb8f2a302b073..fad60678760b7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10089,23 +10089,15 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, bool FPGAEarly = false; // Validate and propogate CLI options related to device image compression. - { - // -offload-compress - bool isImgCompress = false; - if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { - isImgCompress = true; - WrapperArgs.push_back( + // -offload-compress + if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { + WrapperArgs.push_back( C.getArgs().MakeArgString(Twine("-offload-compress"))); - } - // -offload-compression-level=<> if (Arg *A = C.getInputArgs().getLastArg( - options::OPT_offload_compression_level_EQ)) { - - if (isImgCompress) - WrapperArgs.push_back(C.getArgs().MakeArgString( - Twine("-offload-compression-level=") + A->getValue())); - } + options::OPT_offload_compression_level_EQ)) + WrapperArgs.push_back(C.getArgs().MakeArgString( + Twine("-offload-compression-level=") + A->getValue())); } if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) { From 288ed5b316a43b4a3fc68d858efa1d4e48e70e50 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 10 Sep 2024 08:47:46 -0700 Subject: [PATCH 33/50] Fix formatting --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fad60678760b7..f731e03ef2eff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10092,7 +10092,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, // -offload-compress if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { WrapperArgs.push_back( - C.getArgs().MakeArgString(Twine("-offload-compress"))); + C.getArgs().MakeArgString(Twine("-offload-compress"))); // -offload-compression-level=<> if (Arg *A = C.getInputArgs().getLastArg( options::OPT_offload_compression_level_EQ)) From 7262fd1275f63fbeaaed398954eebb8cd18edb72 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 11 Sep 2024 12:48:28 -0700 Subject: [PATCH 34/50] Address reviews --- .../ClangOffloadWrapper.cpp | 2 +- sycl/source/CMakeLists.txt | 10 +- sycl/source/detail/compression.hpp | 120 ++++++++++-------- sycl/source/detail/device_binary_image.cpp | 40 +++--- sycl/source/detail/device_binary_image.hpp | 4 +- .../program_manager/program_manager.cpp | 22 ++-- .../program_manager/program_manager.hpp | 3 - .../compression/CompressionTests.cpp | 28 ++-- 8 files changed, 117 insertions(+), 112 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 7d1fc238ed436..ef3d4fc372da3 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1153,7 +1153,7 @@ class BinaryWrapper { Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, Img.Tgt); - // Change image format to compressed_non. + // Change image format to compressed_none. Ffmt = ConstantInt::get(Type::getInt8Ty(C), BinaryImageFormat::compressed_none); } diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index f16d6899de285..f432203836855 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -74,9 +74,13 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) set(CMAKE_FIND_DEBUG_MODE 1) # Need zstd for device image compression. - find_package(zstd REQUIRED) - target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) - target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR}) + find_package(zstd) + if (NOT zstd_FOUND) + target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) + else() + target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR}) + endif() target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp index c106b7861ada1..fe997d08159d8 100644 --- a/sycl/source/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -7,6 +7,10 @@ //===----------------------------------------------------------------------===// #pragma once +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE + +#include + #include #include #include @@ -21,23 +25,7 @@ namespace detail { // Singleton class to handle ZSTD compression and decompression. class ZSTDCompressor { private: - // Initialize ZSTD context and error code. - ZSTDCompressor() { - m_ZSTD_compression_ctx = static_cast(ZSTD_createCCtx()); - m_ZSTD_decompression_ctx = static_cast(ZSTD_createDCtx()); - - if (!m_ZSTD_compression_ctx || !m_ZSTD_decompression_ctx) { - std::cerr << "Error creating ZSTD contexts. \n"; - } - - m_lastError = 0; - } - - // Free ZSTD contexts. - ~ZSTDCompressor() { - ZSTD_freeCCtx(static_cast(m_ZSTD_compression_ctx)); - ZSTD_freeDCtx(static_cast(m_ZSTD_decompression_ctx)); - } + ZSTDCompressor() {} ZSTDCompressor(const ZSTDCompressor &) = delete; ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; @@ -50,73 +38,93 @@ class ZSTDCompressor { // Public APIs public: - // Return 0 is last (de)compression was successful, otherwise return error - // code. - static int GetLastError() { return GetSingletonInstance().m_lastError; } - - // Returns a string representation of the error code. - // If the error code is 0, it returns "No error detected". - static std::string GetErrorString(int code) { - return ZSTD_getErrorName(code); - } - // Blob (de)compression do not assume format/structure of the input buffer. static std::unique_ptr CompressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { auto &instance = GetSingletonInstance(); + // Lazy initialize compression context. + if (!instance.m_ZSTD_compression_ctx) { + + // Call ZSTD_createCCtx() and ZSTD_freeCCtx() to create and free the + // context. + instance.m_ZSTD_compression_ctx = + std::unique_ptr(ZSTD_createCCtx(), + ZSTD_freeCCtx); + if (!instance.m_ZSTD_compression_ctx) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to create ZSTD compression context"); + } + } + // Get maximum size of the compressed buffer and allocate it. auto dstBufferSize = ZSTD_compressBound(srcSize); auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + if (!dstBuffer) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to allocate memory for compressed data"); + // Compress the input buffer. - dstSize = ZSTD_compressCCtx( - static_cast(instance.m_ZSTD_compression_ctx), - static_cast(dstBuffer.get()), dstBufferSize, - static_cast(src), srcSize, level); + dstSize = + ZSTD_compressCCtx(instance.m_ZSTD_compression_ctx.get(), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize, level); // Store the error code if compression failed. if (ZSTD_isError(dstSize)) - instance.m_lastError = dstSize; - else - instance.m_lastError = 0; + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + ZSTD_getErrorName(dstSize)); // Pass ownership of the buffer to the caller. return dstBuffer; } - static std::unique_ptr - DecompressBlob(const char *src, size_t srcSize, size_t &dstSize) { + static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, + size_t &dstSize) { auto &instance = GetSingletonInstance(); + // Lazy initialize decompression context. + if (!instance.m_ZSTD_decompression_ctx) { + + // Call ZSTD_createDCtx() and ZSTD_freeDCtx() to create and free the + // context. + instance.m_ZSTD_decompression_ctx = + std::unique_ptr(ZSTD_createDCtx(), + ZSTD_freeDCtx); + if (!instance.m_ZSTD_decompression_ctx) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to create ZSTD decompression context"); + } + } + // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. - // TODO: Throw if the decompression size is too large. auto dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { - - std::cerr << "Error determining size of uncompressed data\n"; - dstSize = 0; - instance.m_lastError = dstBufferSize; - return nullptr; + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Error determining size of uncompressed data."); } // Allocate buffer for decompressed data. - auto dstBuffer = - std::unique_ptr(new unsigned char[dstBufferSize]); + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); - dstSize = ZSTD_decompressDCtx( - static_cast(instance.m_ZSTD_decompression_ctx), - static_cast(dstBuffer.get()), dstBufferSize, - static_cast(src), srcSize); + if (!dstBuffer) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to allocate memory for decompressed data"); + + dstSize = + ZSTD_decompressDCtx(instance.m_ZSTD_decompression_ctx.get(), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize); // In case of decompression error, return the error message and set dstSize // to 0. if (ZSTD_isError(dstSize)) { - instance.m_lastError = dstSize; - dstSize = 0; + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + ZSTD_getErrorName(dstSize)); } // Pass ownership of the buffer to the caller. @@ -125,12 +133,14 @@ class ZSTDCompressor { // Data fields private: - int m_lastError; - // ZSTD context. Reusing ZSTD context speeds up subsequent (de)compression. - // Storing as void* to avoid including ZSTD headers in this file. - void *m_ZSTD_compression_ctx; - void *m_ZSTD_decompression_ctx; + // ZSTD contexts. Reusing ZSTD context speeds up subsequent (de)compression. + std::unique_ptr m_ZSTD_compression_ctx{ + nullptr, nullptr}; + std::unique_ptr m_ZSTD_decompression_ctx{ + nullptr, nullptr}; }; } // namespace detail } // namespace _V1 } // namespace sycl + +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE \ No newline at end of file diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 5259f65234d76..e0815f16c9a2f 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -172,28 +172,27 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" - Format = ur::getBinaryImageFormat(this->Bin->BinaryStart, getSize()); + Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); - SpecConstIDMap.init(this->Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); + SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( - this->Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); - DeviceLibReqMask.init(this->Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); - KernelParamOptInfo.init(this->Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); - AssertUsed.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); - ProgramMetadata.init(this->Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); + Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); + DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); + KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); + AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); + ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); // Convert ProgramMetadata into the UR format for (const auto &Prop : ProgramMetadata) { ProgramMetadataUR.push_back( ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); } - ExportedSymbols.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); - ImportedSymbols.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); - DeviceGlobals.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); - DeviceRequirements.init(this->Bin, - __SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS); - HostPipes.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); - VirtualFunctions.init(this->Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); + ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); + ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); + DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); + DeviceRequirements.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS); + HostPipes.init(Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); + VirtualFunctions.init(Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); ImageId = ImageCounter++; } @@ -231,6 +230,7 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) : RTDeviceBinaryImage() { @@ -243,18 +243,13 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( reinterpret_cast(CompressedBin->BinaryStart), compressedDataSize, DecompressedSize); - if (!m_DecompressedData) { - throw sycl::exception( - sycl::make_error_code(sycl::errc::runtime), - "Failed to decompress device binary image. " + - ZSTDCompressor::GetErrorString(ZSTDCompressor::GetLastError())); - } - Bin = new sycl_device_binary_struct(*CompressedBin); - Bin->BinaryStart = m_DecompressedData.get(); + Bin->BinaryStart = (const unsigned char *)(m_DecompressedData.get()); Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; // Set the new format to none and let RT determine the format. + // TODO: Add support for automatically detecting compressed + // binary format. Bin->Format = SYCL_DEVICE_BINARY_TYPE_NONE; init(Bin); @@ -265,6 +260,7 @@ CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { delete Bin; Bin = nullptr; } +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index acf3265b0099b..62dc0afce90fd 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -276,6 +276,7 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { std::unique_ptr Data; }; +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE // Compressed device binary image. It decompresses the binary image on // construction and stores the decompressed data as RTDeviceBinaryImage. // Also, frees the decompressed data in destructor. @@ -290,8 +291,9 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { } private: - std::unique_ptr m_DecompressedData; + std::unique_ptr m_DecompressedData; }; +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 498b104796fb3..e94eabfa86eac 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1529,6 +1529,13 @@ getDeviceLibPrograms(const ContextImplPtr Context, return Programs; } +// Check if device image is compressed. +static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { + + auto currFormat = static_cast(Bin->Format); + return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE; +} + ProgramManager::ProgramPtr ProgramManager::build( ProgramPtr Program, const ContextImplPtr Context, const std::string &CompileOptions, const std::string &LinkOptions, @@ -1660,7 +1667,14 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { std::unique_ptr Img; if (isDeviceImageCompressed(RawImg)) +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE Img = std::make_unique(RawImg); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Recieved a compressed device image, but " + "SYCL RT was built without ZSTD support." + "Aborting. "); +#endif else Img = std::make_unique(RawImg); @@ -2808,14 +2822,6 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( return UrKernel; } -// Check if device image is compressed. -inline bool -ProgramManager::isDeviceImageCompressed(sycl_device_binary Bin) const { - - auto currFormat = static_cast(Bin->Format); - return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE; -} - bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img) { return !checkDevSupportDeviceRequirements(Dev, Img).has_value(); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c1a6a208417c3..c38cc7babd370 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -320,9 +320,6 @@ class ProgramManager { collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, device Dev); - // Returns whether the device image is compressed or not. - inline bool isDeviceImageCompressed(sycl_device_binary Bin) const; - /// The three maps below are used during kernel resolution. Any kernel is /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp index 77a577cec2a2f..0114cfeac5551 100644 --- a/sycl/unittests/compression/CompressionTests.cpp +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -38,15 +38,6 @@ TEST(CompressionTest, SimpleCompression) { // Check if decompressed data is same as original data. std::string decompressedStr((char *)decompressedData.get(), decompressedSize); ASSERT_EQ(data, decompressedStr); - - // Check that error code is 0 after successful decompression. - int errorCode = ZSTDCompressor::GetLastError(); - ASSERT_EQ(errorCode, 0); - - // Check that error string is "No error detected" after successful - // decompression. - std::string errorString = ZSTDCompressor::GetErrorString(errorCode); - ASSERT_EQ(errorString, "No error detected"); } // Test getting error code and error string. @@ -55,14 +46,15 @@ TEST(CompressionTest, SimpleCompression) { TEST(CompressionTest, NegativeErrorTest) { std::string input = "Hello, World!"; size_t decompressedSize = 0; - auto compressedData = ZSTDCompressor::DecompressBlob( - input.c_str(), input.size(), decompressedSize); - - int errorCode = ZSTDCompressor::GetLastError(); - ASSERT_NE(errorCode, 0); - - std::string errorString = ZSTDCompressor::GetErrorString(errorCode); - ASSERT_NE(errorString, "No error detected"); + bool threwException = false; + try { + auto compressedData = ZSTDCompressor::DecompressBlob( + input.c_str(), input.size(), decompressedSize); + } catch (sycl::exception &e) { + threwException = true; + } + + ASSERT_TRUE(threwException); } // Test passing empty input to (de)compress. @@ -75,7 +67,6 @@ TEST(CompressionTest, EmptyInputTest) { ASSERT_NE(compressedData, nullptr); ASSERT_GT(compressedSize, 0); - ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); size_t decompressedSize = 0; auto decompressedData = ZSTDCompressor::DecompressBlob( @@ -83,7 +74,6 @@ TEST(CompressionTest, EmptyInputTest) { ASSERT_NE(decompressedData, nullptr); ASSERT_EQ(decompressedSize, 0); - ASSERT_EQ(ZSTDCompressor::GetLastError(), 0); std::string decompressedStr((char *)decompressedData.get(), decompressedSize); ASSERT_EQ(input, decompressedStr); From 6ff1a3764fd9c60c2689238537329be4bb3942cd Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 12 Sep 2024 10:11:32 -0700 Subject: [PATCH 35/50] Add E2E tests for image compression. --- sycl/CMakeLists.txt | 8 +++ sycl/source/CMakeLists.txt | 4 +- .../Compression/Inputs/single_kernel.cpp | 17 +++++ sycl/test-e2e/Compression/compression.cpp | 23 +----- sycl/test-e2e/Compression/compression_aot.cpp | 5 ++ .../Compression/compression_multiple_tu.cpp | 68 ++++++++++++++++++ .../compression_seperate_compile.cpp | 70 +++++++++++++++++++ sycl/test-e2e/Compression/no_zstd_warning.cpp | 5 ++ sycl/test-e2e/lit.cfg.py | 3 + sycl/test-e2e/lit.site.cfg.py.in | 2 + sycl/unittests/CMakeLists.txt | 7 +- .../compression/CompressionTests.cpp | 8 +-- 12 files changed, 192 insertions(+), 28 deletions(-) create mode 100644 sycl/test-e2e/Compression/Inputs/single_kernel.cpp create mode 100644 sycl/test-e2e/Compression/compression_aot.cpp create mode 100644 sycl/test-e2e/Compression/compression_multiple_tu.cpp create mode 100644 sycl/test-e2e/Compression/compression_seperate_compile.cpp create mode 100644 sycl/test-e2e/Compression/no_zstd_warning.cpp diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 3c529e7e5aeb2..1f78b215fbc3f 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -329,6 +329,14 @@ if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS AND NOT WIN32) endif() endif() +# Need zstd for device image compression. +find_package(zstd) +if (zstd_FOUND) +set(SYCL_ZSTD_AVAILABLE ON) +else() +set(SYCL_ZSTD_AVAILABLE OFF) +endif() + set(SYCL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # SYCL runtime library diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index f432203836855..510acc7165db8 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -73,9 +73,7 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) # being found on Windows CI machines. set(CMAKE_FIND_DEBUG_MODE 1) - # Need zstd for device image compression. - find_package(zstd) - if (NOT zstd_FOUND) + if (NOT SYCL_ZSTD_AVAILABLE) target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) else() target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) diff --git a/sycl/test-e2e/Compression/Inputs/single_kernel.cpp b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp new file mode 100644 index 0000000000000..eac8a63438f85 --- /dev/null +++ b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp @@ -0,0 +1,17 @@ +#include + +int main() { + + sycl::queue q0; + int val = -1; + { + sycl::buffer buffer1(&val, sycl::range(1)); + + q0.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(buffer1, cgh); + cgh.single_task([=] { acc[0] = acc[0] + 1; }); + }).wait(); + } + + return !(val == 0); +} diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp index d17b6fd293e69..1d8da7abc9d49 100644 --- a/sycl/test-e2e/Compression/compression.cpp +++ b/sycl/test-e2e/Compression/compression.cpp @@ -1,24 +1,7 @@ // End-to-End test for testing device image compression. -// RUN: %{build} -O0 -g -o %t_not_compress.out -// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 -o %t_compress.out +// REQUIRES: zstd +// RUN: %{build} -O0 -g %S/Inputs/single_kernel.cpp -o %t_not_compress.out +// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out // RUN: %{run} %t_not_compress.out // RUN: %{run} %t_compress.out // RUN: not diff %t_not_compress.out %t_compress.out - -#include - -int main() { - - sycl::queue q0; - int val = -1; - { - sycl::buffer buffer1(&val, sycl::range(1)); - - q0.submit([&](sycl::handler &cgh) { - auto acc = sycl::accessor(buffer1, cgh); - cgh.single_task([=] { acc[0] = acc[0] + 1; }); - }).wait(); - } - - return !(val == 0); -} diff --git a/sycl/test-e2e/Compression/compression_aot.cpp b/sycl/test-e2e/Compression/compression_aot.cpp new file mode 100644 index 0000000000000..5b44b6a41e9ce --- /dev/null +++ b/sycl/test-e2e/Compression/compression_aot.cpp @@ -0,0 +1,5 @@ +// End-to-End test for testing device image compression in AOT. +// REQUIRES: zstd, opencl-aot, cpu + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -O0 --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out +// RUN: %{run} %t_compress.out diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp new file mode 100644 index 0000000000000..27b71e47cfdef --- /dev/null +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -0,0 +1,68 @@ +// End-to-End test for testing device image compression when we have two +// translation units, one compressed and one not compressed. +// REQUIRES: zstd + +// RUN: %clangxx --offload-compress -DENABLE_KERNEL1 -fsycl -O0 -shared -fPIC %s -o %t_kernel1.so +// RUN: %clangxx -DENABLE_KERNEL2 -fsycl -O0 -shared -fPIC %s -o %t_kernel2.so + +// RUN: %clangxx -fsycl %t_kernel1.so %t_kernel2.so %s -Wl,-rpath=%T -o %t_compress.out +// RUN: %{run} %t_compress.out +#if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) +#include + +using namespace sycl; + +class TestFnObj { +public: + TestFnObj(buffer &buf, handler &cgh) + : data(buf.get_access(cgh)) {} + accessor data; + void operator()(id<1> item) const { data[item] = item[0]; } +}; +#endif + +void kernel1(); +void kernel2(); + +#ifdef ENABLE_KERNEL1 +void kernel1() { + static int data[10]; + { + buffer b(data, range<1>(10)); + queue q; + q.submit([&](sycl::handler &cgh) { + TestFnObj kernel(b, cgh); + cgh.parallel_for(range<1>(10), kernel); + }); + } + for (int i = 0; i < 10; i++) { + assert(data[i] == i); + } +} +#endif + +#ifdef ENABLE_KERNEL2 +void kernel2() { + static int data[256]; + { + buffer b(data, range<1>(256)); + queue q; + q.submit([&](handler &cgh) { + TestFnObj kernel(b, cgh); + cgh.parallel_for(range<1>(256), kernel); + }); + } + for (int i = 0; i < 256; i++) { + assert(data[i] == i); + } +} +#endif + +#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) +int main() { + kernel1(); + kernel2(); + + return 0; +} +#endif diff --git a/sycl/test-e2e/Compression/compression_seperate_compile.cpp b/sycl/test-e2e/Compression/compression_seperate_compile.cpp new file mode 100644 index 0000000000000..178d1930af31d --- /dev/null +++ b/sycl/test-e2e/Compression/compression_seperate_compile.cpp @@ -0,0 +1,70 @@ +// End-to-End test for testing device image compression when we +// seperatly compile and link device images. + +// REQUIRES: zstd, opencl-aot, cpu, linux + +////////////////////// Compile device images +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=g++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=g++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o + +////////////////////// Link device images +// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v + +////////////////////// Compile the host program +// RUN: g++ -I%T/../../../../../include -I%T/../../../../../include/sycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o + +////////////////////// Link the host program and compressed device images +// RUN: g++ %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -L%T/../../../../../lib -lsycl -o %t_compress.out + +// RUN: %{run} %t_compress.out + +#include + +using namespace sycl; + +// Kernel 1 +#ifdef ENABLE_KERNEL1 +class test_kernel1; +void run_kernel1(int *a, queue q) { + q.single_task([=]() { *a *= 3; }); +} +#endif + +// Kernel 2 +#ifdef ENABLE_KERNEL2 +class test_kernel2; +void run_kernel2(int *a, queue q) { + q.single_task([=]() { *a += 42; }); +} +#endif + +// Main application. +#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) +#include +#include + +#include + +class kernel_init; +void run_kernel1(int *a, queue q); +void run_kernel2(int *a, queue q); +int main() { + int retCode = 0; + queue q(cpu_selector_v, {property::queue::in_order()}); + + if (!q.get_device().get_info()) + return 0; + + int *p = malloc_shared(1, q); + *p = 42; + + run_kernel1(p, q); + run_kernel2(p, q); + q.wait(); + + retCode = *p != (42 * 3 + 42); + + free(p, q); + return retCode; +} +#endif diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp new file mode 100644 index 0000000000000..635ed96acd3dc --- /dev/null +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -0,0 +1,5 @@ +// Test to check warnings when using --offload-compress without zstd. +// REQUIRES: !zstd +// RUN: %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s + +// CHECK: warning: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index bbef264be9c24..8bd1c0c4cc336 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -594,6 +594,9 @@ def open_check_file(file_name): if lit_config.params.get("ze_debug"): config.available_features.add("ze_debug") +if config.zstd_found and config.zstd_found == 'TRUE': + config.available_features.add("zstd") + if config.run_launcher: config.substitutions.append(("%e2e_tests_root", config.test_source_root)) diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index dfa5003c1a2f4..b67d84c3a6146 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -44,6 +44,8 @@ config.vulkan_include_dir = "@Vulkan_INCLUDE_DIRS@" config.vulkan_lib = "@Vulkan_LIBRARY@" config.vulkan_found = "@Vulkan_FOUND@" +config.zstd_found = "@zstd_FOUND@" + config.run_launcher = lit_config.params.get('run_launcher', "@SYCL_E2E_RUN_LAUNCHER@") config.allow_unknown_arch = "@SYCL_E2E_LIT_ALLOW_UNKNOWN_ARCH@" diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index c672febd2c911..09a279821cb54 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -53,7 +53,12 @@ add_subdirectory(accessor) add_subdirectory(handler) add_subdirectory(builtins) add_subdirectory(buffer/l0_specific) -add_subdirectory(compression) + +# Enable compression unit-tests only if zstd is present. +if (SYCL_ZSTD_AVAILABLE) + add_subdirectory(compression) +endif() + # TODO Enable xpti tests for Windows if (NOT WIN32) add_subdirectory(xpti_trace) diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp index 0114cfeac5551..7f91c9ad31b66 100644 --- a/sycl/unittests/compression/CompressionTests.cpp +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -25,7 +25,7 @@ TEST(CompressionTest, SimpleCompression) { // Check if compression was successful. EXPECT_NE(compressedData, nullptr); - EXPECT_GT(compressedDataSize, 0); + EXPECT_GT(compressedDataSize, (size_t)0); // Decompress the data. size_t decompressedSize = 0; @@ -33,7 +33,7 @@ TEST(CompressionTest, SimpleCompression) { compressedData.get(), compressedDataSize, decompressedSize); ASSERT_NE(decompressedData, nullptr); - ASSERT_GT(decompressedSize, 0); + ASSERT_GT(decompressedSize, (size_t)0); // Check if decompressed data is same as original data. std::string decompressedStr((char *)decompressedData.get(), decompressedSize); @@ -66,14 +66,14 @@ TEST(CompressionTest, EmptyInputTest) { input.c_str(), input.size(), compressedSize, 1); ASSERT_NE(compressedData, nullptr); - ASSERT_GT(compressedSize, 0); + ASSERT_GT(compressedSize, (size_t)0); size_t decompressedSize = 0; auto decompressedData = ZSTDCompressor::DecompressBlob( compressedData.get(), compressedSize, decompressedSize); ASSERT_NE(decompressedData, nullptr); - ASSERT_EQ(decompressedSize, 0); + ASSERT_EQ(decompressedSize, (size_t)0); std::string decompressedStr((char *)decompressedData.get(), decompressedSize); ASSERT_EQ(input, decompressedStr); From 022a7ef5dc443be9fd3efd5f9e20d6848a1bdbcf Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 12 Sep 2024 10:35:55 -0700 Subject: [PATCH 36/50] Remove pending TODOs; Apply clang-format --- sycl/source/detail/compression.hpp | 3 ++- sycl/test-e2e/lit.cfg.py | 2 +- sycl/test/lit.cfg.py | 2 -- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp index fe997d08159d8..56894a4de0dd1 100644 --- a/sycl/source/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -143,4 +143,5 @@ class ZSTDCompressor { } // namespace _V1 } // namespace sycl -#endif // SYCL_RT_ZSTD_NOT_AVAIABLE \ No newline at end of file +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE + diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 8bd1c0c4cc336..12aba3248c45a 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -594,7 +594,7 @@ def open_check_file(file_name): if lit_config.params.get("ze_debug"): config.available_features.add("ze_debug") -if config.zstd_found and config.zstd_found == 'TRUE': +if config.zstd_found and config.zstd_found == "TRUE": config.available_features.add("zstd") if config.run_launcher: diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 3205e54d236ba..788d9ab37a9e3 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -143,8 +143,6 @@ config.substitutions.append(("%sycl_triple", triple)) additional_flags = config.sycl_clang_extra_flags.split(" ") -# TODO: Remove this. -additional_flags.append("--offload-compress") if config.cuda == "ON": config.available_features.add("cuda") From 2eb0c25680be2597bb0e5a0d00ec4e1f0f3da5e0 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 12 Sep 2024 18:48:23 -0700 Subject: [PATCH 37/50] Reuse LLVM_ENABLE_ZSTD --- buildbot/configure.py | 1 + sycl/CMakeLists.txt | 8 -------- sycl/source/CMakeLists.txt | 2 +- sycl/test-e2e/CMakeLists.txt | 6 ++++++ sycl/unittests/CMakeLists.txt | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/buildbot/configure.py b/buildbot/configure.py index 3f55da1769965..e086e1b09c421 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -179,6 +179,7 @@ def do_configure(args): "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", "-DLLVM_ENABLE_ZSTD=ON", + "-DLLVM_USE_STATIC_ZSTD=ON", "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 1f78b215fbc3f..3c529e7e5aeb2 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -329,14 +329,6 @@ if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS AND NOT WIN32) endif() endif() -# Need zstd for device image compression. -find_package(zstd) -if (zstd_FOUND) -set(SYCL_ZSTD_AVAILABLE ON) -else() -set(SYCL_ZSTD_AVAILABLE OFF) -endif() - set(SYCL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # SYCL runtime library diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 510acc7165db8..ed6db74fdd697 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -73,7 +73,7 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) # being found on Windows CI machines. set(CMAKE_FIND_DEBUG_MODE 1) - if (NOT SYCL_ZSTD_AVAILABLE) + if (NOT LLVM_ENABLE_ZSTD) target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) else() target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index 2379d7859e6a5..db9f60c0faabe 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -83,6 +83,12 @@ if(NOT SYCL_TEST_E2E_STANDALONE) ) endif() # Standalone. +if(SYCL_TEST_E2E_STANDALONE) + if( NOT zstd_FOUND ) + find_package(zstd) + endif() +endif() # Standalone. + add_custom_target(check-sycl-e2e COMMAND ${Python3_EXECUTABLE} ${LLVM_LIT} ${SYCL_E2E_TESTS_LIT_FLAGS} . COMMENT "Running SYCL End-to-End tests" diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 09a279821cb54..0d8b2ac283c17 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -55,7 +55,7 @@ add_subdirectory(builtins) add_subdirectory(buffer/l0_specific) # Enable compression unit-tests only if zstd is present. -if (SYCL_ZSTD_AVAILABLE) +if (LLVM_ENABLE_ZSTD) add_subdirectory(compression) endif() From 44b41dd9fcc974c802c3cbde0932f7d84db03a8a Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 13 Sep 2024 10:02:42 -0700 Subject: [PATCH 38/50] Find zstd package when we build E2E tests seperatly from the compiler --- sycl/test-e2e/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index db9f60c0faabe..6c1ce9208269c 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -83,11 +83,7 @@ if(NOT SYCL_TEST_E2E_STANDALONE) ) endif() # Standalone. -if(SYCL_TEST_E2E_STANDALONE) - if( NOT zstd_FOUND ) - find_package(zstd) - endif() -endif() # Standalone. +find_package(zstd) add_custom_target(check-sycl-e2e COMMAND ${Python3_EXECUTABLE} ${LLVM_LIT} ${SYCL_E2E_TESTS_LIT_FLAGS} . From 1d8181335fb188aa4ae0ad39b3826a4162b200d2 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 13 Sep 2024 13:17:55 -0700 Subject: [PATCH 39/50] Throw error in clang-offload-wrapper when zstd is not present but still do --offload-compress; Dynamically detect if zstd is present when running E2E tests. --- .../ClangOffloadWrapper.cpp | 11 +++---- sycl/source/detail/compression.hpp | 1 - sycl/test-e2e/CMakeLists.txt | 2 -- sycl/test-e2e/Compression/no_zstd_warning.cpp | 4 +-- sycl/test-e2e/lit.cfg.py | 30 +++++++++++++++++-- sycl/test-e2e/lit.site.cfg.py.in | 2 -- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index ef3d4fc372da3..07bdf7254a8bc 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1109,12 +1109,13 @@ class BinaryWrapper { Fbin = *FBinOrErr; } else { - // If '--offload-compress' option is specified and zstd is not available - // then warn the user that the image will not be compressed. + // If '--offload-compress' option is specified and zstd is not + // available, throw an error. if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) { - WithColor::warning(errs(), ToolName) - << "'--offload-compress' option is specified but zstd is not " - "available. The device image will not be compressed.\n"; + createStringError(inconvertibleErrorCode(), + "'--offload-compress' option is specified but zstd " + "is not available. The device image will not be " + "compressed."); } // Don't compress if the user explicitly specifies the binary image diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp index 56894a4de0dd1..4810f446f6b7a 100644 --- a/sycl/source/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -144,4 +144,3 @@ class ZSTDCompressor { } // namespace sycl #endif // SYCL_RT_ZSTD_NOT_AVAIABLE - diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index 6c1ce9208269c..2379d7859e6a5 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -83,8 +83,6 @@ if(NOT SYCL_TEST_E2E_STANDALONE) ) endif() # Standalone. -find_package(zstd) - add_custom_target(check-sycl-e2e COMMAND ${Python3_EXECUTABLE} ${LLVM_LIT} ${SYCL_E2E_TESTS_LIT_FLAGS} . COMMENT "Running SYCL End-to-End tests" diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp index 635ed96acd3dc..bf67e4232de97 100644 --- a/sycl/test-e2e/Compression/no_zstd_warning.cpp +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -1,5 +1,5 @@ -// Test to check warnings when using --offload-compress without zstd. +// using --offload-compress without zstd should throw an error. // REQUIRES: !zstd // RUN: %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s -// CHECK: warning: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. +// XFAIL: * diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 12aba3248c45a..998971ced0ab0 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -308,6 +308,33 @@ def open_check_file(file_name): if sp[0] == 0: config.available_features.add("preview-breaking-changes-supported") +# Check if clang is built with ZSTD and compression support. +check_zstd_file = os.path.join(config.sycl_obj_root, "compression_available.cpp") +with open(check_zstd_file, "w") as fp: + print( + textwrap.dedent( + """ + #include + using namespace sycl; + void kernel1(buffer &b, queue q) { + q.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(b, cgh); + q.single_task([=] {acc[0] = acc[0] + 1;}); + }); + } + """ + ), + file=fp, + ) + +sp = subprocess.getstatusoutput( + config.dpcpp_compiler + + " -fsycl --offload-compress -shared -fPIC " + + check_zstd_file +) +if sp[0] == 0: + config.available_features.add("zstd") + # Check for CUDA SDK check_cuda_file = "cuda_include.cpp" with open_check_file(check_cuda_file) as fp: @@ -594,9 +621,6 @@ def open_check_file(file_name): if lit_config.params.get("ze_debug"): config.available_features.add("ze_debug") -if config.zstd_found and config.zstd_found == "TRUE": - config.available_features.add("zstd") - if config.run_launcher: config.substitutions.append(("%e2e_tests_root", config.test_source_root)) diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index b67d84c3a6146..dfa5003c1a2f4 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -44,8 +44,6 @@ config.vulkan_include_dir = "@Vulkan_INCLUDE_DIRS@" config.vulkan_lib = "@Vulkan_LIBRARY@" config.vulkan_found = "@Vulkan_FOUND@" -config.zstd_found = "@zstd_FOUND@" - config.run_launcher = lit_config.params.get('run_launcher', "@SYCL_E2E_RUN_LAUNCHER@") config.allow_unknown_arch = "@SYCL_E2E_LIT_ALLOW_UNKNOWN_ARCH@" From 9936bab5c9785e115a39e6ca50448ab36ee0d32a Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sat, 14 Sep 2024 15:53:48 -0700 Subject: [PATCH 40/50] Add debug mode to Finezstd.cmake --- llvm/cmake/modules/Findzstd.cmake | 2 ++ sycl/source/CMakeLists.txt | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/cmake/modules/Findzstd.cmake b/llvm/cmake/modules/Findzstd.cmake index 9877b6ca63215..7e07409c25dd9 100644 --- a/llvm/cmake/modules/Findzstd.cmake +++ b/llvm/cmake/modules/Findzstd.cmake @@ -10,6 +10,8 @@ # zstd::libzstd_shared # zstd::libzstd_static +set(CMAKE_FIND_DEBUG_MODE 1) + if(MSVC) set(zstd_STATIC_LIBRARY_SUFFIX "_static\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") else() diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index ed6db74fdd697..b5db3f421e19d 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,10 +69,6 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() - # TODO: Remove debug print. Need this to figure out why zstd is not - # being found on Windows CI machines. - set(CMAKE_FIND_DEBUG_MODE 1) - if (NOT LLVM_ENABLE_ZSTD) target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) else() From 969b520a2814dd883a74955ee967e4d7ff659050 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 15 Sep 2024 08:57:37 -0700 Subject: [PATCH 41/50] Fix compression_seperate_compile test failure --- .../test-e2e/Compression/compression_seperate_compile.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/Compression/compression_seperate_compile.cpp b/sycl/test-e2e/Compression/compression_seperate_compile.cpp index 178d1930af31d..ad4bf1fd69355 100644 --- a/sycl/test-e2e/Compression/compression_seperate_compile.cpp +++ b/sycl/test-e2e/Compression/compression_seperate_compile.cpp @@ -4,17 +4,17 @@ // REQUIRES: zstd, opencl-aot, cpu, linux ////////////////////// Compile device images -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=g++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=g++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o ////////////////////// Link device images // RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v ////////////////////// Compile the host program -// RUN: g++ -I%T/../../../../../include -I%T/../../../../../include/sycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o +// RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o ////////////////////// Link the host program and compressed device images -// RUN: g++ %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -L%T/../../../../../lib -lsycl -o %t_compress.out +// RUN: %clangxx -fsycl %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -o %t_compress.out // RUN: %{run} %t_compress.out From 32e4868c82c4cbdd5562420a906b277344ab34c2 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 15 Sep 2024 13:47:38 -0700 Subject: [PATCH 42/50] Fix unreferenced var error on MSVC; Remove debug prints. --- llvm/cmake/modules/Findzstd.cmake | 9 +++------ sycl/unittests/compression/CompressionTests.cpp | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/cmake/modules/Findzstd.cmake b/llvm/cmake/modules/Findzstd.cmake index 7e07409c25dd9..4bc0b793e51c9 100644 --- a/llvm/cmake/modules/Findzstd.cmake +++ b/llvm/cmake/modules/Findzstd.cmake @@ -10,20 +10,17 @@ # zstd::libzstd_shared # zstd::libzstd_static -set(CMAKE_FIND_DEBUG_MODE 1) - if(MSVC) set(zstd_STATIC_LIBRARY_SUFFIX "_static\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") else() set(zstd_STATIC_LIBRARY_SUFFIX "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") endif() -find_path(zstd_INCLUDE_DIR NAMES zstd.h HINTS $ENV{ZSTD_ROOT}/include) -find_library(zstd_LIBRARY NAMES zstd zstd_static HINTS $ENV{ZSTD_ROOT}/lib) +find_path(zstd_INCLUDE_DIR NAMES zstd.h) +find_library(zstd_LIBRARY NAMES zstd zstd_static) find_library(zstd_STATIC_LIBRARY NAMES zstd_static - "${CMAKE_STATIC_LIBRARY_PREFIX}zstd${CMAKE_STATIC_LIBRARY_SUFFIX}" - HINTS $ENV{ZSTD_ROOT}/lib) + "${CMAKE_STATIC_LIBRARY_PREFIX}zstd${CMAKE_STATIC_LIBRARY_SUFFIX}") include(FindPackageHandleStandardArgs) find_package_handle_standard_args( diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp index 7f91c9ad31b66..e9b50fa1cc2e0 100644 --- a/sycl/unittests/compression/CompressionTests.cpp +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -50,7 +50,7 @@ TEST(CompressionTest, NegativeErrorTest) { try { auto compressedData = ZSTDCompressor::DecompressBlob( input.c_str(), input.size(), decompressedSize); - } catch (sycl::exception &e) { + } catch (...) { threwException = true; } From fd0f1e334766219575d7569fa0102fd413440e6c Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 16 Sep 2024 23:59:34 -0700 Subject: [PATCH 43/50] Simply E2E test and fix failure on CUDA --- .../Compression/compression_multiple_tu.cpp | 46 +++++++------------ 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index 27b71e47cfdef..dbc60a6447e7e 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -2,63 +2,51 @@ // translation units, one compressed and one not compressed. // REQUIRES: zstd -// RUN: %clangxx --offload-compress -DENABLE_KERNEL1 -fsycl -O0 -shared -fPIC %s -o %t_kernel1.so -// RUN: %clangxx -DENABLE_KERNEL2 -fsycl -O0 -shared -fPIC %s -o %t_kernel2.so +// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %t_kernel1.so +// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %t_kernel2.so -// RUN: %clangxx -fsycl %t_kernel1.so %t_kernel2.so %s -Wl,-rpath=%T -o %t_compress.out +// RUN: %{build} %t_kernel1.so %t_kernel2.so -Wl,-rpath=%T -o %t_compress.out // RUN: %{run} %t_compress.out #if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) #include using namespace sycl; - -class TestFnObj { -public: - TestFnObj(buffer &buf, handler &cgh) - : data(buf.get_access(cgh)) {} - accessor data; - void operator()(id<1> item) const { data[item] = item[0]; } -}; #endif -void kernel1(); -void kernel2(); - #ifdef ENABLE_KERNEL1 void kernel1() { - static int data[10]; + int data = -1; { - buffer b(data, range<1>(10)); + buffer b(&data, range(1)); queue q; q.submit([&](sycl::handler &cgh) { - TestFnObj kernel(b, cgh); - cgh.parallel_for(range<1>(10), kernel); + auto acc = accessor(b, cgh); + cgh.single_task([=]{ acc[0] = __builtin_abs(acc[0]);}); }); } - for (int i = 0; i < 10; i++) { - assert(data[i] == i); - } + assert(data == 1); } #endif #ifdef ENABLE_KERNEL2 void kernel2() { - static int data[256]; + int data = -2; { - buffer b(data, range<1>(256)); + buffer b(&data, range(1)); queue q; - q.submit([&](handler &cgh) { - TestFnObj kernel(b, cgh); - cgh.parallel_for(range<1>(256), kernel); + q.submit([&](sycl::handler &cgh) { + auto acc = accessor(b, cgh); + cgh.single_task([=]{ acc[0] = __builtin_abs(acc[0]);}); }); } - for (int i = 0; i < 256; i++) { - assert(data[i] == i); - } + assert(data == 2); } #endif #if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) +void kernel1(); +void kernel2(); + int main() { kernel1(); kernel2(); From eb7588cb13694edc20d657e3a6e458fdaec32fbc Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 17 Sep 2024 12:54:24 -0700 Subject: [PATCH 44/50] Address reviews --- sycl/source/detail/compression.hpp | 1 + sycl/source/detail/device_binary_image.cpp | 3 ++- sycl/test-e2e/Compression/compression_multiple_tu.cpp | 6 +++--- ...eperate_compile.cpp => compression_separate_compile.cpp} | 6 +++--- 4 files changed, 9 insertions(+), 7 deletions(-) rename sycl/test-e2e/Compression/{compression_seperate_compile.cpp => compression_separate_compile.cpp} (92%) diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp index 4810f446f6b7a..9d9acbfce3739 100644 --- a/sycl/source/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -39,6 +39,7 @@ class ZSTDCompressor { // Public APIs public: // Blob (de)compression do not assume format/structure of the input buffer. + // This function can be used in future for compression in on-disk cache. static std::unique_ptr CompressBlob(const char *src, size_t srcSize, size_t &dstSize, int level) { auto &instance = GetSingletonInstance(); diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index e0815f16c9a2f..6ba30c9b256f9 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -244,7 +244,8 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( compressedDataSize, DecompressedSize); Bin = new sycl_device_binary_struct(*CompressedBin); - Bin->BinaryStart = (const unsigned char *)(m_DecompressedData.get()); + Bin->BinaryStart = + reinterpret_cast(m_DecompressedData.get()); Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; // Set the new format to none and let RT determine the format. diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index dbc60a6447e7e..b5202ead11d3e 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -8,8 +8,8 @@ // RUN: %{build} %t_kernel1.so %t_kernel2.so -Wl,-rpath=%T -o %t_compress.out // RUN: %{run} %t_compress.out #if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) +#include #include - using namespace sycl; #endif @@ -21,7 +21,7 @@ void kernel1() { queue q; q.submit([&](sycl::handler &cgh) { auto acc = accessor(b, cgh); - cgh.single_task([=]{ acc[0] = __builtin_abs(acc[0]);}); + cgh.single_task([=] { acc[0] = abs(acc[0]); }); }); } assert(data == 1); @@ -36,7 +36,7 @@ void kernel2() { queue q; q.submit([&](sycl::handler &cgh) { auto acc = accessor(b, cgh); - cgh.single_task([=]{ acc[0] = __builtin_abs(acc[0]);}); + cgh.single_task([=] { acc[0] = abs(acc[0]); }); }); } assert(data == 2); diff --git a/sycl/test-e2e/Compression/compression_seperate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp similarity index 92% rename from sycl/test-e2e/Compression/compression_seperate_compile.cpp rename to sycl/test-e2e/Compression/compression_separate_compile.cpp index ad4bf1fd69355..9e47bbebdc875 100644 --- a/sycl/test-e2e/Compression/compression_seperate_compile.cpp +++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp @@ -26,7 +26,7 @@ using namespace sycl; #ifdef ENABLE_KERNEL1 class test_kernel1; void run_kernel1(int *a, queue q) { - q.single_task([=]() { *a *= 3; }); + q.single_task([=]() { *a *= 3; }).wait(); } #endif @@ -34,7 +34,7 @@ void run_kernel1(int *a, queue q) { #ifdef ENABLE_KERNEL2 class test_kernel2; void run_kernel2(int *a, queue q) { - q.single_task([=]() { *a += 42; }); + q.single_task([=]() { *a += 42; }).wait(); } #endif @@ -50,7 +50,7 @@ void run_kernel1(int *a, queue q); void run_kernel2(int *a, queue q); int main() { int retCode = 0; - queue q(cpu_selector_v, {property::queue::in_order()}); + queue q; if (!q.get_device().get_info()) return 0; From 58f9939c2dffefab063c13e2caa34b93caa9b39e Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 17 Sep 2024 17:51:31 -0700 Subject: [PATCH 45/50] Add clang driver test. Address reviews. --- .../Driver/sycl-offload-wrapper-compression.cpp | 16 ++++++++++++++++ sycl/test-e2e/Compression/no_zstd_warning.cpp | 5 ++--- 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/sycl-offload-wrapper-compression.cpp diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp new file mode 100644 index 0000000000000..f9b1907feba22 --- /dev/null +++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp @@ -0,0 +1,16 @@ +/// +/// Check if '--offload-compress' and '--offload-compression-level' CLI +/// options are passed to the clang-offload-wrapper. +/// + +// REQUIRES: zstd + +// RUN: %clangxx -### -fsycl --offload-compress --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS +// CHECK-COMPRESS: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}"-offload-compression-level=3"{{.*}} + +// Make sure that the compression options are not passed when --offload-compress is not set. +// RUN: %clangxx -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS +// RUN: %clangxx -### -fsycl --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}} +// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compression-level=3"{{.*}} diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp index bf67e4232de97..8a4460f9b8643 100644 --- a/sycl/test-e2e/Compression/no_zstd_warning.cpp +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -1,5 +1,4 @@ // using --offload-compress without zstd should throw an error. // REQUIRES: !zstd -// RUN: %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s - -// XFAIL: * +// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s +// CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. From 575efc6d2dce3afabfddeed870765156722aa926 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 18 Sep 2024 09:38:43 -0700 Subject: [PATCH 46/50] Fix detection of zstd LIT feature on Windows --- sycl/test-e2e/lit.cfg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index a93ad9b08f85b..59b4943081869 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -327,9 +327,11 @@ def open_check_file(file_name): file=fp, ) +add_fPIC = "-fPIC " if platform.system() != "Windows" else " " + sp = subprocess.getstatusoutput( config.dpcpp_compiler + - " -fsycl --offload-compress -shared -fPIC " + + " -fsycl --offload-compress -shared " + add_fPIC + check_zstd_file ) if sp[0] == 0: From 970ad3563efe05d7c65bf0b54050de63de645be1 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 18 Sep 2024 16:44:56 -0700 Subject: [PATCH 47/50] Simplify zstd detection in LIT; Remove ZSTD requirement in clang driver test. --- .../sycl-offload-wrapper-compression.cpp | 2 -- .../Compression/compression_multiple_tu.cpp | 5 +-- sycl/test-e2e/lit.cfg.py | 33 +++++-------------- 3 files changed, 11 insertions(+), 29 deletions(-) diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp index f9b1907feba22..1ef9282ee3598 100644 --- a/clang/test/Driver/sycl-offload-wrapper-compression.cpp +++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp @@ -3,8 +3,6 @@ /// options are passed to the clang-offload-wrapper. /// -// REQUIRES: zstd - // RUN: %clangxx -### -fsycl --offload-compress --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS // CHECK-COMPRESS: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}"-offload-compression-level=3"{{.*}} diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index b5202ead11d3e..8f21a9a9796a2 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -2,8 +2,9 @@ // translation units, one compressed and one not compressed. // REQUIRES: zstd -// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %t_kernel1.so -// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %t_kernel2.so +// DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%} +// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared %{fPIC_flag} -o %t_kernel1.so +// RUN: %{build} -DENABLE_KERNEL2 -shared %{fPIC_flag} -o %t_kernel2.so // RUN: %{build} %t_kernel1.so %t_kernel2.so -Wl,-rpath=%T -o %t_compress.out // RUN: %{run} %t_compress.out diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 59b4943081869..4675206d90dd7 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -309,32 +309,15 @@ def open_check_file(file_name): config.available_features.add("preview-breaking-changes-supported") # Check if clang is built with ZSTD and compression support. -check_zstd_file = os.path.join(config.sycl_obj_root, "compression_available.cpp") -with open(check_zstd_file, "w") as fp: - print( - textwrap.dedent( - """ - #include - using namespace sycl; - void kernel1(buffer &b, queue q) { - q.submit([&](sycl::handler &cgh) { - auto acc = sycl::accessor(b, cgh); - q.single_task([=] {acc[0] = acc[0] + 1;}); - }); - } - """ - ), - file=fp, - ) - -add_fPIC = "-fPIC " if platform.system() != "Windows" else " " - -sp = subprocess.getstatusoutput( - config.dpcpp_compiler + - " -fsycl --offload-compress -shared " + add_fPIC + - check_zstd_file +fPIC_opt = "-fPIC" if platform.system() != "Windows" else "" +ps = subprocess.Popen( + [config.dpcpp_compiler, "-fsycl", "--offload-compress", "-shared", fPIC_opt, "-x", "c++", "-", "-o", "-"], + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, ) -if sp[0] == 0: +op = ps.communicate(input=b"") +if ps.wait() == 0: config.available_features.add("zstd") # Check for CUDA SDK From c1a2c131168ef5f0b06f2c21f29bf4e80c78b160 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 23 Sep 2024 19:53:45 -0700 Subject: [PATCH 48/50] Delay image decompression till it is actually used. --- .../ClangOffloadWrapper.cpp | 9 ++--- sycl/source/detail/compression.hpp | 19 +++++++---- sycl/source/detail/device_binary_image.cpp | 34 ++++++++++++------- sycl/source/detail/device_binary_image.hpp | 19 +++++++++-- .../program_manager/program_manager.cpp | 21 ++++++++++++ .../Compression/compression_multiple_tu.cpp | 6 ++-- 6 files changed, 79 insertions(+), 29 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 07bdf7254a8bc..0c10714d2d449 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1112,10 +1112,11 @@ class BinaryWrapper { // If '--offload-compress' option is specified and zstd is not // available, throw an error. if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) { - createStringError(inconvertibleErrorCode(), - "'--offload-compress' option is specified but zstd " - "is not available. The device image will not be " - "compressed."); + return createStringError( + inconvertibleErrorCode(), + "'--offload-compress' option is specified but zstd " + "is not available. The device image will not be " + "compressed."); } // Don't compress if the user explicitly specifies the binary image diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp index 9d9acbfce3739..2b19a39f9b7c3 100644 --- a/sycl/source/detail/compression.hpp +++ b/sycl/source/detail/compression.hpp @@ -81,6 +81,17 @@ class ZSTDCompressor { return dstBuffer; } + static size_t GetDecompressedSize(const char *src, size_t srcSize) { + size_t dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); + + if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || + dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Error determining size of uncompressed data."); + } + return dstBufferSize; + } + static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, size_t &dstSize) { auto &instance = GetSingletonInstance(); @@ -101,13 +112,7 @@ class ZSTDCompressor { // Size of decompressed image can be larger than what we can allocate // on heap. In that case, we need to use streaming decompression. - auto dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); - - if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || - dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Error determining size of uncompressed data."); - } + auto dstBufferSize = GetDecompressedSize(src, srcSize); // Allocate buffer for decompressed data. auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 6ba30c9b256f9..2be48d4a38fce 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -170,6 +170,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { // it when invoking the offload wrapper job Format = static_cast(Bin->Format); + // For compressed images, we delay determining the format until the image is + // decompressed. if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); @@ -186,7 +188,6 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { ProgramMetadataUR.push_back( ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); } - ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); @@ -235,25 +236,34 @@ CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) : RTDeviceBinaryImage() { - size_t compressedDataSize = static_cast(CompressedBin->BinaryEnd - - CompressedBin->BinaryStart); + // 'CompressedBin' is part of the executable image loaded into memory + // which can't be modified easily. So, we need to make a copy of it. + Bin = new sycl_device_binary_struct(*CompressedBin); + + // Get the decompressed size of the binary image. + m_ImageSize = ZSTDCompressor::GetDecompressedSize( + reinterpret_cast(Bin->BinaryStart), + static_cast(Bin->BinaryEnd - Bin->BinaryStart)); + + init(Bin); +} + +void CompressedRTDeviceBinaryImage::Decompress() { + + size_t CompressedDataSize = + static_cast(Bin->BinaryEnd - Bin->BinaryStart); size_t DecompressedSize = 0; m_DecompressedData = ZSTDCompressor::DecompressBlob( - reinterpret_cast(CompressedBin->BinaryStart), - compressedDataSize, DecompressedSize); + reinterpret_cast(Bin->BinaryStart), CompressedDataSize, + DecompressedSize); - Bin = new sycl_device_binary_struct(*CompressedBin); Bin->BinaryStart = reinterpret_cast(m_DecompressedData.get()); Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; - // Set the new format to none and let RT determine the format. - // TODO: Add support for automatically detecting compressed - // binary format. - Bin->Format = SYCL_DEVICE_BINARY_TYPE_NONE; - - init(Bin); + Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); + Format = static_cast(Bin->Format); } CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 62dc0afce90fd..203427b89ca45 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -158,7 +158,10 @@ class RTDeviceBinaryImage { virtual void print() const; virtual void dump(std::ostream &Out) const; - size_t getSize() const { + // getSize will be overridden in the case of compressed binary images. + // In that case, we return the size of uncompressed data, instead of + // BinaryEnd - BinaryStart. + virtual size_t getSize() const { assert(Bin && "binary image data not set"); return static_cast(Bin->BinaryEnd - Bin->BinaryStart); } @@ -277,14 +280,23 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { }; #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE -// Compressed device binary image. It decompresses the binary image on -// construction and stores the decompressed data as RTDeviceBinaryImage. +// Compressed device binary image. Decompression happens when the image is +// actually used to build a program. // Also, frees the decompressed data in destructor. class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { public: CompressedRTDeviceBinaryImage(sycl_device_binary Bin); ~CompressedRTDeviceBinaryImage() override; + void Decompress(); + + // We return the size of decompressed data, not the size of compressed data. + size_t getSize() const override { + assert(Bin && "binary image data not set"); + return m_ImageSize; + } + + bool IsCompressed() const { return m_DecompressedData.get() == nullptr; } void print() const override { RTDeviceBinaryImage::print(); std::cerr << " COMPRESSED\n"; @@ -292,6 +304,7 @@ class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { private: std::unique_ptr m_DecompressedData; + size_t m_ImageSize; }; #endif // SYCL_RT_ZSTD_NOT_AVAIABLE diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e94eabfa86eac..ea8ac9fa5273f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -733,6 +733,12 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } +static inline void CheckAndDecompressImage(RTDeviceBinaryImage *Img) { + if (auto CompImg = dynamic_cast(Img)) + if (CompImg->IsCompressed()) + CompImg->Decompress(); +} + // When caching is enabled, the returned UrProgram will already have // its ref count incremented. ur_program_handle_t ProgramManager::getBuiltURProgram( @@ -785,6 +791,10 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( collectDeviceImageDepsForImportedSymbols(Img, Device); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); + // Decompress all DeviceImagesToLink + for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) + CheckAndDecompressImage(BinImg); + std::vector AllImages; AllImages.reserve(ImageDeps.size() + 1); AllImages.push_back(&Img); @@ -1388,6 +1398,10 @@ ProgramManager::getDeviceImage(const std::string &KernelName, Device); } } + + // Decompress the image if it is compressed. + CheckAndDecompressImage(Img); + if (Img) { CheckJITCompilationForImage(Img, JITCompilationIsRequired); @@ -1714,6 +1728,10 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { [&](auto &CurrentImg) { return CurrentImg.first->getFormat() == Img->getFormat(); }); + + // Check if image is compressed, and decompress it before dumping. + CheckAndDecompressImage(Img.get()); + dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); } @@ -2191,6 +2209,9 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( auto &[KernelImagesState, KernelImages] = *StateImagesPair; + // Check if device image is compressed and decompress it if needed + CheckAndDecompressImage(BinImage); + if (KernelImages.empty()) { KernelImagesState = ImgState; KernelImages.push_back(BinImage); diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index 8f21a9a9796a2..c72dba76054a6 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -3,10 +3,10 @@ // REQUIRES: zstd // DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%} -// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared %{fPIC_flag} -o %t_kernel1.so -// RUN: %{build} -DENABLE_KERNEL2 -shared %{fPIC_flag} -o %t_kernel2.so +// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared %{fPIC_flag} -o %T/kernel1.so +// RUN: %{build} -DENABLE_KERNEL2 -shared %{fPIC_flag} -o %T/kernel2.so -// RUN: %{build} %t_kernel1.so %t_kernel2.so -Wl,-rpath=%T -o %t_compress.out +// RUN: %{build} %t_kernel1.so %t_kernel2.so -o %t_compress.out // RUN: %{run} %t_compress.out #if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) #include From 966e3ddc1ee48b89faa32b7c71037d598a46fd6e Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 23 Sep 2024 23:16:21 -0700 Subject: [PATCH 49/50] Fix E2E test failure in compression_multiple_tu --- sycl/test-e2e/Compression/compression_multiple_tu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index c72dba76054a6..ce230d19c97b7 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -6,7 +6,7 @@ // RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared %{fPIC_flag} -o %T/kernel1.so // RUN: %{build} -DENABLE_KERNEL2 -shared %{fPIC_flag} -o %T/kernel2.so -// RUN: %{build} %t_kernel1.so %t_kernel2.so -o %t_compress.out +// RUN: %{build} %T/kernel1.so %T/kernel2.so -o %t_compress.out // RUN: %{run} %t_compress.out #if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) #include From 946a738d5b7730e5af1cdf56d4c0ceaa176991da Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 25 Sep 2024 13:18:34 -0700 Subject: [PATCH 50/50] Address reviews --- .../ClangOffloadWrapper.cpp | 21 +++++++++++++------ .../program_manager/program_manager.cpp | 2 ++ .../Compression/compression_multiple_tu.cpp | 7 +++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 0c10714d2d449..66853d5daefa2 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1134,12 +1134,21 @@ class BinaryWrapper { // Compress the image using zstd. SmallVector CompressedBuffer; - llvm::compression::zstd::compress( - ArrayRef( - (const unsigned char *)(Bin->getBufferStart()), - Bin->getBufferSize()), - CompressedBuffer, OffloadCompressLevel); - +#if LLVM_ENABLE_EXCEPTIONS + try { +#endif + llvm::compression::zstd::compress( + ArrayRef( + (const unsigned char *)(Bin->getBufferStart()), + Bin->getBufferSize()), + CompressedBuffer, OffloadCompressLevel); +#if LLVM_ENABLE_EXCEPTIONS + } catch (const std::exception &ex) { + return createStringError(inconvertibleErrorCode(), + std::string("Failed to compress the device image: \n") + + std::string(ex.what())); + } +#endif if (Verbose) errs() << "[Compression] Original image size: " << Bin->getBufferSize() << "\n" diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ea8ac9fa5273f..4865f7c9ed98f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -734,9 +734,11 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } static inline void CheckAndDecompressImage(RTDeviceBinaryImage *Img) { +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) CompImg->Decompress(); +#endif } // When caching is enabled, the returned UrProgram will already have diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index ce230d19c97b7..72eb3f0904790 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -1,10 +1,9 @@ // End-to-End test for testing device image compression when we have two // translation units, one compressed and one not compressed. -// REQUIRES: zstd +// REQUIRES: zstd, linux -// DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%} -// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared %{fPIC_flag} -o %T/kernel1.so -// RUN: %{build} -DENABLE_KERNEL2 -shared %{fPIC_flag} -o %T/kernel2.so +// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %T/kernel1.so +// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %T/kernel2.so // RUN: %{build} %T/kernel1.so %T/kernel2.so -o %t_compress.out // RUN: %{run} %t_compress.out