Skip to content

Commit

Permalink
Compile both raw and object variants of LTO-IR
Browse files Browse the repository at this point in the history
  • Loading branch information
gmarkall committed Apr 19, 2024
1 parent d234d29 commit e5dafbd
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 15 deletions.
5 changes: 5 additions & 0 deletions pynvjitlink/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def device_functions_ltoir():
return read_test_file("test_device_functions.ltoir")


@pytest.fixture(scope="session")
def device_functions_ltoir_object():
return read_test_file("test_device_functions.ltoir.o")


@pytest.fixture(scope="session")
def device_functions_object():
return read_test_file("test_device_functions.o")
Expand Down
28 changes: 14 additions & 14 deletions pynvjitlink/tests/test_pynvjitlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def test_add_file(input_file, input_type, gpu_arch_flag, request):
# We test the LTO input case separately as it requires the `-lto` flag. The
# OBJECT input type is used because the LTO-IR container is packaged in an ELF
# object when produced by NVCC.
def test_add_file_lto(device_functions_ltoir, gpu_arch_flag):
filename, data = device_functions_ltoir
def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag):
filename, data = device_functions_ltoir_object

handle = _nvjitlinklib.create(gpu_arch_flag, "-lto")
_nvjitlinklib.add_data(handle, InputType.OBJECT.value, data, filename)
Expand Down Expand Up @@ -123,11 +123,11 @@ def test_get_linked_cubin_link_not_complete_error(
_nvjitlinklib.destroy(handle)


def test_get_linked_cubin_from_lto(device_functions_ltoir, gpu_arch_flag):
filename, data = device_functions_ltoir
# device_functions_ltoir is a host object containing a fatbin containing an
# LTOIR container, because that is what NVCC produces when LTO is
# requested. So we need to use the OBJECT input type, and the linker
def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag):
filename, data = device_functions_ltoir_object
# device_functions_ltoir_object is a host object containing a fatbin
# containing an LTOIR container, because that is what NVCC produces when
# LTO is requested. So we need to use the OBJECT input type, and the linker
# retrieves the LTO IR from it because we passed the -lto flag.
input_type = InputType.OBJECT.value
handle = _nvjitlinklib.create(gpu_arch_flag, "-lto")
Expand All @@ -140,11 +140,11 @@ def test_get_linked_cubin_from_lto(device_functions_ltoir, gpu_arch_flag):
assert cubin[:4] == b"\x7fELF"


def test_get_linked_ptx_from_lto(device_functions_ltoir, gpu_arch_flag):
filename, data = device_functions_ltoir
# device_functions_ltoir is a host object containing a fatbin containing an
# LTOIR container, because that is what NVCC produces when LTO is
# requested. So we need to use the OBJECT input type, and the linker
def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag):
filename, data = device_functions_ltoir_object
# device_functions_ltoir_object is a host object containing a fatbin
# containing an LTOIR container, because that is what NVCC produces when
# LTO is requested. So we need to use the OBJECT input type, and the linker
# retrieves the LTO IR from it because we passed the -lto flag.
input_type = InputType.OBJECT.value
handle = _nvjitlinklib.create(gpu_arch_flag, "-lto", "-ptx")
Expand All @@ -154,9 +154,9 @@ def test_get_linked_ptx_from_lto(device_functions_ltoir, gpu_arch_flag):
_nvjitlinklib.destroy(handle)


def test_get_linked_ptx_link_not_complete_error(device_functions_ltoir, gpu_arch_flag):
def test_get_linked_ptx_link_not_complete_error(device_functions_ltoir_object, gpu_arch_flag):
handle = _nvjitlinklib.create(gpu_arch_flag, "-lto", "-ptx")
filename, data = device_functions_ltoir
filename, data = device_functions_ltoir_object
input_type = InputType.OBJECT.value
_nvjitlinklib.add_data(handle, input_type, data, filename)
with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"):
Expand Down
5 changes: 4 additions & 1 deletion test_binary_generation/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@ all:
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu

# Generate LTO-IR wrapped in a fatbin
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
# Generate LTO-IR in a "raw" LTO-IR container
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
# We also want to test linking a .cu file; this needs no compilation,
# so copy it instead
cp test_device_functions.cu $(OUTPUT_DIR)

0 comments on commit e5dafbd

Please sign in to comment.