Skip to content

Commit

Permalink
Merge branch 'refactor-rm-offline-inference-minseokl' into 'main'
Browse files Browse the repository at this point in the history
Remove the offline inference in the code level

See merge request dl/hugectr/hugectr!1466
  • Loading branch information
minseokl committed Sep 11, 2023
2 parents 7f9763f + 038c538 commit ac75c65
Show file tree
Hide file tree
Showing 127 changed files with 279 additions and 14,530 deletions.
16 changes: 2 additions & 14 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ build_inference:
DST_IMAGE: $INFER_IMAGE_VERSIONED
CMAKE_OPTION: "-DENABLE_INFERENCE=ON -DCMAKE_BUILD_TYPE=Release -DSM=\"70;75;80;90\" -DCLANGFORMAT=OFF"
BUILD_HUGECTR: 1
BUILD_HUGECTR_BACKEND: 1
BUILD_HPS_BACKEND: 1
HUGECTR_BACKEND_VER: main
TRITON_BRANCH: ${TARGET_TRITON_BRANCH}

build_sok_tf2:
Expand Down Expand Up @@ -530,19 +531,6 @@ e2e_nvt_regression_test:
DGXNNODES: 1
TEST_CMD: ./ci/integration_test/nvt/nvt_regression_test.sub

notebook_hugectr:
extends: .cluster_test_job_daily
needs:
- build_train_single_node
variables:
GPFSFOLDER: $LOGDIR/notebook_hugectr
GIT_CLONE_PATH: ${GIT_CLONE_PATH_SELENE}
CONT: $TRAIN_IMAGE_VERSIONED
MOUNTS: /lustre/fsw/devtech/hpc-hugectr/criteo_1TB/day_0:/workdir/tools/day_0,/lustre/fsw/devtech/hpc-hugectr/criteo_1TB/day_1:/workdir/tools/day_1
WALLTIME: "01:00:00"
DGXNNODES: 1
TEST_CMD: ./ci/integration_test/notebooks/notebook_hugectr.sub

nb_hps_demo:
extends: .cluster_test_job_daily
needs:
Expand Down
11 changes: 7 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,13 @@ find_package(OpenMP REQUIRED)
find_package(Threads)

option(ENABLE_MULTINODES "Enable multi-nodes training" OFF)
option(ENABLE_INFERENCE "Enable Inference" OFF)

if(ENABLE_MULTINODES AND ENABLE_INFERENCE)
message(WARNING "Inference can be only enabled with the multi-node mode off. Set ENABLE_MULTINODES=OFF")
set(ENABLE_MULTINODES OFF)
endif()

if(ENABLE_MULTINODES)
message(STATUS "Multi Node Enabled")
find_package(MPI)
Expand All @@ -141,7 +148,6 @@ if (KEY_HIT_RATIO)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DKEY_HIT_RATIO")
endif()

option(ENABLE_INFERENCE "Enable Inference" OFF)
if(ENABLE_INFERENCE)
add_definitions(-DLIBCUDACXX_VERSION)
endif()
Expand Down Expand Up @@ -324,11 +330,8 @@ if(ENABLE_INFERENCE)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DENABLE_INFERENCE")
add_subdirectory(HugeCTR/core23)
add_subdirectory(HugeCTR/src/hps)
add_subdirectory(HugeCTR/src/inference)
add_subdirectory(HugeCTR/src/inference_benchmark)
add_subdirectory(HugeCTR/src/cpu)
add_subdirectory(test/utest/hps)
add_subdirectory(test/utest/inference)
else()
#setting binary files install path
add_subdirectory(HugeCTR/src)
Expand Down
11 changes: 0 additions & 11 deletions HugeCTR/include/core23_network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,6 @@ class Core23TempNetwork final {
*/
void upload_params_to_device(const std::string& model_file);

/**
* Read parameters from model_file.
*/
void upload_params_to_device_inference(const std::string& model_file);

/**
* Read non-trainable parameters from model_file, e.g., running mean and running variable for
* BatchNorm
*/
void upload_non_trainable_params_to_device_inference(const std::string& model_file);

/**
* Writing parameters to cpu buffer.
*/
Expand Down
35 changes: 0 additions & 35 deletions HugeCTR/include/cpu/create_embedding_cpu.hpp

This file was deleted.

43 changes: 0 additions & 43 deletions HugeCTR/include/cpu/create_pipeline_cpu.hpp

This file was deleted.

81 changes: 0 additions & 81 deletions HugeCTR/include/cpu/embedding_feature_combiner_cpu.hpp

This file was deleted.

64 changes: 0 additions & 64 deletions HugeCTR/include/cpu/inference_session_cpu.hpp

This file was deleted.

60 changes: 0 additions & 60 deletions HugeCTR/include/cpu/layer_cpu.hpp

This file was deleted.

Loading

0 comments on commit ac75c65

Please sign in to comment.