Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

change windows build system #16980

Merged
merged 6 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 80 additions & 22 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
cmake_dependent_option(USE_SPLIT_ARCH_DLL "Build a separate DLL for each Cuda arch (Windows only)." ON "MSVC" OFF)


message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
Expand Down Expand Up @@ -100,6 +102,7 @@ endif()

if(MSVC)
set(SYSTEM_ARCHITECTURE x86_64)
enable_language(ASM_MASM)
else()
execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE SYSTEM_ARCHITECTURE)
endif()
Expand Down Expand Up @@ -593,8 +596,16 @@ if(USE_CUDA)
include(${CMAKE_ROOT}/Modules/FindCUDA/select_compute_arch.cmake)
CUDA_SELECT_NVCC_ARCH_FLAGS(CUDA_ARCH_FLAGS ${MXNET_CUDA_ARCH})
message("-- CUDA: Using the following NVCC architecture flags ${CUDA_ARCH_FLAGS}")
set(arch_code_list)
foreach(arch_str ${CUDA_ARCH_FLAGS})
if((arch_str MATCHES ".*sm_[0-9]+"))
string( REGEX REPLACE ".*sm_([0-9]+)" "\\1" arch_code ${arch_str} )
list(APPEND arch_code_list ${arch_code})
endif()
endforeach()

string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}")
string(APPEND CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}")


find_package(CUDAToolkit REQUIRED cublas cufft cusolver curand
OPTIONAL_COMPONENTS nvToolsExt nvrtc)
Expand Down Expand Up @@ -667,6 +678,7 @@ add_library(sample_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib
target_include_directories(sample_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
set(MXNET_INSTALL_TARGETS mxnet)
if(UNIX)
string(APPEND CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
# Create dummy file since we want an empty shared library before linking
set(DUMMY_SOURCE ${CMAKE_BINARY_DIR}/dummy.c)
file(WRITE ${DUMMY_SOURCE} "")
Expand All @@ -678,30 +690,66 @@ if(UNIX)
target_link_libraries(mxnet_static PUBLIC ${CMAKE_DL_LIBS})
target_compile_options(sample_lib PUBLIC -shared)
set_target_properties(mxnet_static PROPERTIES OUTPUT_NAME mxnet)
else()
add_library(mxnet SHARED ${SOURCE})
elseif(MSVC)
target_compile_options(sample_lib PUBLIC /LD)
set_target_properties(sample_lib PROPERTIES PREFIX "lib")
endif()

if(USE_CUDA AND MSVC)
target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd -Gy>")
target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT -Gy>")
if(USE_CUDA)
if(MSVC)
if(USE_SPLIT_ARCH_DLL)
add_executable(gen_warp tools/windowsbuild/gen_warp.cpp)
add_library(mxnet SHARED tools/windowsbuild/warp_dll.cpp ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp
${CMAKE_BINARY_DIR}/warp_gen.asm)
target_link_libraries(mxnet PRIVATE cudart Shlwapi)
list(GET arch_code_list 0 mxnet_first_arch)
foreach(arch ${arch_code_list})
add_library(mxnet_${arch} SHARED ${SOURCE})
target_compile_options(
mxnet_${arch}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:--gpu-architecture=compute_${arch}>"
)
target_compile_options(
mxnet_${arch}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:--gpu-code=sm_${arch},compute_${arch}>"
)
target_compile_options(
mxnet_${arch}
PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
target_compile_options(
mxnet_${arch}
PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")
endforeach()

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp ${CMAKE_BINARY_DIR}/warp_gen.asm
COMMAND gen_warp $<TARGET_FILE:mxnet_${mxnet_first_arch}> WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/ DEPENDS $<TARGET_FILE:mxnet_${mxnet_first_arch}>)
else(USE_SPLIT_ARCH_DLL)
string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
set(CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
add_library(mxnet SHARED ${SOURCE})
target_compile_options(
mxnet
PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
target_compile_options(
mxnet
PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")

endif(USE_SPLIT_ARCH_DLL)
else()
add_library(mxnet SHARED ${SOURCE})
endif()
else()
add_library(mxnet SHARED ${SOURCE})
endif()

endif()

if(USE_DIST_KVSTORE)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ps-lite/CMakeLists.txt)
add_subdirectory("3rdparty/ps-lite")
list(APPEND pslite_LINKER_LIBS pslite protobuf)
target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG})
else()
list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE})
endif()
target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})

else()
set(pslite_LINKER_LIBS protobuf zmq-static)
endif()
Expand Down Expand Up @@ -735,13 +783,24 @@ if(USE_TVM_OP)
)
endif()

target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})

if(USE_PLUGINS_WARPCTC)
target_link_libraries(mxnet PUBLIC debug ${WARPCTC_LIB_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${WARPCTC_LIB_RELEASE})
list(APPEND mxnet_LINKER_LIBS ${WARPCTC_LIB})
endif()

if(MSVC)
if(USE_SPLIT_ARCH_DLL AND USE_CUDA)
foreach(arch ${arch_code_list})
target_link_libraries(mxnet_${arch} PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet_${arch} PUBLIC dmlc)
endforeach()
else()
target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet PUBLIC dmlc)
endif()
else()
target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet PUBLIC dmlc)
endif()

if(USE_OPENCV AND OpenCV_VERSION_MAJOR GREATER 2)
add_executable(im2rec "tools/im2rec.cc")
Expand All @@ -761,7 +820,6 @@ else()
is required for im2rec, im2rec will not be available")
endif()

target_link_libraries(mxnet PUBLIC dmlc)

if(MSVC AND USE_MXNET_LIB_NAMING)
set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
Expand Down
19 changes: 19 additions & 0 deletions tools/windowsbuild/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->

<!--- http://www.apache.org/licenses/LICENSE-2.0 -->

<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->

Due to dll size limitation under windows. Split dll into different dlls according to arch
Reference https://github.com/apache/incubator-mxnet/pull/16980
209 changes: 209 additions & 0 deletions tools/windowsbuild/gen_warp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <iostream>
#include <io.h>
#include <Windows.h>
#include <cstdint>
#include <memory>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>

#define IMAGE_SIZEOF_SIGNATURE 4


DWORD rva_to_foa(IN DWORD RVA, IN PIMAGE_SECTION_HEADER section_header)
{

size_t count = 0;
for (count = 1; RVA > (section_header->VirtualAddress + section_header->Misc.VirtualSize); count++, section_header++);

DWORD FOA = RVA - section_header->VirtualAddress + section_header->PointerToRawData;

return FOA;
}

std::string format(const char* format, ...)
{
va_list args;
va_start(args, format);
#ifndef _MSC_VER
size_t size = std::snprintf(nullptr, 0, format, args) + 1; // Extra space for '\0'
std::unique_ptr<char[]> buf(new char[size]);
std::vsnprintf(buf.get(), size, format, args);
return std::string(buf.get(), buf.get() + size - 1); // We don't want the '\0' inside
#else
int size = _vscprintf(format, args) +1;
std::unique_ptr<char[]> buf(new char[size]);
vsnprintf_s(buf.get(), size, _TRUNCATE, format, args);
return std::string(buf.get());
#endif
va_end(args);
}

int main(int argc, char* argv[])
{

if (argc != 2)
{
return 0;
}

//open file
const HANDLE h_file = CreateFile(
argv[1],
GENERIC_READ ,
FILE_SHARE_READ ,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
nullptr);


DWORD size_high;
const DWORD size_low = GetFileSize(h_file, &size_high);

uint64_t dll_size = ((uint64_t(size_high)) << 32) + (uint64_t)size_low;

// Create File Mapping
const HANDLE h_map_file = CreateFileMapping(
h_file,
nullptr,
PAGE_READONLY,
size_high,
size_low,
nullptr);
if (h_map_file == INVALID_HANDLE_VALUE || h_map_file == nullptr)
{
std::cout << "error";
CloseHandle(h_file);
return 0;
}

//Map File to memory
void* pv_file = MapViewOfFile(
h_map_file,
FILE_MAP_READ,
0,
0,
0);

if (pv_file == nullptr)
{
std::cout << "error";
CloseHandle(h_file);
return 0;
}

uint8_t* p = static_cast<uint8_t*>(pv_file);


PIMAGE_DOS_HEADER dos_header = reinterpret_cast<PIMAGE_DOS_HEADER>(p);

const PIMAGE_NT_HEADERS nt_headers = reinterpret_cast<const PIMAGE_NT_HEADERS>(p + dos_header->e_lfanew);

const PIMAGE_FILE_HEADER file_header = &nt_headers->FileHeader;

PIMAGE_OPTIONAL_HEADER optional_header = (PIMAGE_OPTIONAL_HEADER)(&nt_headers->OptionalHeader);

const DWORD file_alignment = optional_header->FileAlignment;


PIMAGE_SECTION_HEADER section_table =
reinterpret_cast<PIMAGE_SECTION_HEADER>(p + dos_header->e_lfanew +
IMAGE_SIZEOF_SIGNATURE +
IMAGE_SIZEOF_FILE_HEADER +
file_header->SizeOfOptionalHeader);

DWORD export_foa = rva_to_foa(optional_header->DataDirectory[0].VirtualAddress, section_table);

PIMAGE_EXPORT_DIRECTORY export_directory = (PIMAGE_EXPORT_DIRECTORY)(p + export_foa);


DWORD name_list_foa = rva_to_foa(export_directory->AddressOfNames, section_table);

PDWORD name_list = (PDWORD)(p + name_list_foa);




std::vector<std::string> func_list;

for (size_t i = 0; i < export_directory->NumberOfNames; i++, name_list++)
{

DWORD name_foa = rva_to_foa(* name_list, section_table);
char* name = (char*)(p + name_foa);
func_list.emplace_back(name);

}


UnmapViewOfFile(pv_file);
CloseHandle(h_map_file);
CloseHandle(h_file);


std::ofstream gen_cpp_obj;
gen_cpp_obj.open("warp_gen_cpp.cpp", std::ios::out | std::ios::trunc);
gen_cpp_obj << "#include <Windows.h>\n";
gen_cpp_obj << "extern \"C\" \n{\n";


for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_cpp_obj << format("void * warp_point_%d;\n", i);
gen_cpp_obj << format("#pragma comment(linker, \"/export:%s=warp_func_%d\")\n", fun.c_str(), i);
gen_cpp_obj << format("void warp_func_%d();\n", i);
gen_cpp_obj << ("\n");
}
gen_cpp_obj << ("}\n");


gen_cpp_obj << ("void load_function(HMODULE hm)\n{\n");
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_cpp_obj << format("warp_point_%d = (void*)GetProcAddress(hm, \"%s\");\n", i, fun.c_str());
}
gen_cpp_obj << ("}\n");

gen_cpp_obj.close();



std::ofstream gen_asm_obj;
gen_asm_obj.open("warp_gen.asm", std::ios::out | std::ios::trunc);
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_asm_obj << format("EXTERN warp_point_%d:QWORD;\n", i);
}
gen_asm_obj << ".CODE\n";
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_asm_obj << format("warp_func_%d PROC\njmp warp_point_%d;\nwarp_func_%d ENDP\n", i,i,i);
}
gen_asm_obj << "END\n";
gen_asm_obj.close();
}
Loading