Skip to content

Commit

Permalink
Fix building issue #1
Browse files Browse the repository at this point in the history
  • Loading branch information
Fan Yu committed Jan 5, 2021
1 parent f10565a commit 8c982d8
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 1,403 deletions.
1 change: 1 addition & 0 deletions HugeCTR/include/inference/embedding_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <thread>
#include <utility>
#include <vector>
#include <common.hpp>
#include <inference/inference_utils.hpp>

namespace HugeCTR {
Expand Down
226 changes: 153 additions & 73 deletions HugeCTR/include/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,53 @@
#include <fstream>
#include <functional>
#include <gpu_resource.hpp>
#include <inference/embedding_feature_combiner.hpp>
#include <learning_rate_scheduler.hpp>
#include <metrics.hpp>
#include <network.hpp>
#include <nlohmann/json.hpp>

namespace HugeCTR {

nlohmann::json read_json_file(const std::string& filename);

struct SolverParser {
// std::string configure_file;
unsigned long long seed; /**< seed of data simulator */
LrPolicy_t lr_policy; /**< the only fixed lr is supported now. */
int display; /**< the interval of loss display. */
int max_iter; /**< the number of iterations for training */
int num_epochs; /**< the number of epochs for training */
int snapshot; /**< the number of iterations for a snapshot */
std::string snapshot_prefix; /**< naming prefix of snapshot file */
int eval_interval; /**< the interval of evaluations */
int eval_batches; /**< the number of batches for evaluations */
int batchsize_eval; /**< batchsize for eval */
int batchsize; /**< batchsize */
std::string model_file; /**< name of model file */
std::vector<std::string> embedding_files; /**< name of embedding file */
std::vector<std::vector<int>> vvgpu; /**< device map */
bool use_mixed_precision;
float scaler;
std::map<metrics::Type, float> metrics_spec;
bool i64_input_key;
bool use_algorithm_search;
bool use_cuda_graph;
SolverParser(const std::string& file);
SolverParser() {}
};
struct InferenceParser {
// std::string configure_file;
size_t max_batchsize; /**< batchsize */
std::string dense_model_file; /**< name of model file */
std::vector<std::string> sparse_model_files; /**< name of embedding file */
bool use_mixed_precision;
float scaler;
bool use_algorithm_search;
bool use_cuda_graph;
InferenceParser(const nlohmann::json& config);
};

/**
* @brief The parser of configure file (in json format).
*
Expand All @@ -51,42 +91,37 @@ class Parser {
const bool use_algorithm_search_;
const bool use_cuda_graph_;

template <typename TypeKey>
void create_pipeline_internal(std::shared_ptr<IDataReader>& data_reader,
std::shared_ptr<IDataReader>& data_reader_eval,
std::vector<std::shared_ptr<IEmbedding>>& embedding,
std::vector<std::unique_ptr<Network>>& network,
const std::shared_ptr<ResourceManager>& resource_manager);

template <typename TypeEmbeddingComp>
void create_pipeline_inference(const InferenceParser& inference_parser,
Tensor2<float>& dense_input,
std::vector<std::shared_ptr<Tensor2<int>>>& rows,
std::vector<std::shared_ptr<Tensor2<float>>>& embeddingvecs,
std::vector<size_t>& embedding_table_slot_size,
std::vector<std::shared_ptr<Layer>>* embedding, Network** network,
const std::shared_ptr<ResourceManager> resource_manager);

public:
std::vector<TensorEntry> tensor_entries;
/**
* Ctor.
* Ctor only verify the configure file, doesn't create pipeline.
*/
Parser(const std::string& configure_file, size_t batch_size, size_t batch_size_eval,
bool repeat_dataset, bool i64_input_key = false, bool use_mixed_precision = false,
float scaler = 1.0f, bool use_algorithm_search = true, bool use_cuda_graph = true);

Parser(const std::string& configure_file,
size_t batch_size,
size_t batch_size_eval,
bool repeat_dataset,
bool i64_input_key = false,
bool use_mixed_precision = false,
float scaler = 1.0f,
bool use_algorithm_search = true,
bool use_cuda_graph = true)
: batch_size_(batch_size),
batch_size_eval_(batch_size_eval),
repeat_dataset_(repeat_dataset),
i64_input_key_(i64_input_key),
use_mixed_precision_(use_mixed_precision),
scaler_(scaler),
use_algorithm_search_(use_algorithm_search),
use_cuda_graph_(use_cuda_graph) {
try {
std::ifstream file(configure_file);
if (!file.is_open()) {
CK_THROW_(Error_t::FileCannotOpen, "file.is_open() failed: " + configure_file);
}
file >> config_;
file.close();
} catch (const std::runtime_error& rt_err) {
std::cerr << rt_err.what() << std::endl;
throw;
}
return;
}
/**
* Ctor.
* Ctor used in inference stage
*/
Parser(const nlohmann::json& config);

/**
* Create the pipeline, which includes data reader, embedding.
Expand All @@ -97,14 +132,15 @@ class Parser {
std::vector<std::unique_ptr<Network>>& network,
const std::shared_ptr<ResourceManager>& resource_manager);

template <typename TypeKey>
friend void create_pipeline_internal(std::shared_ptr<IDataReader>& data_reader,
std::shared_ptr<IDataReader>& data_reader_eval,
std::vector<std::shared_ptr<IEmbedding>>& embedding,
std::vector<std::unique_ptr<Network>>& network,
const std::shared_ptr<ResourceManager>& resource_manager,
Parser& parser);

/**
* Create inference pipeline, which only creates network and embedding
*/
void create_pipeline(const InferenceParser& inference_parser, Tensor2<float>& dense_input,
std::vector<std::shared_ptr<Tensor2<int>>>& row,
std::vector<std::shared_ptr<Tensor2<float>>>& embeddingvec,
std::vector<size_t>& embedding_table_slot_size,
std::vector<std::shared_ptr<Layer>>* embedding, Network** network,
const std::shared_ptr<ResourceManager> resource_manager);
};

std::unique_ptr<LearningRateScheduler> get_learning_rate_scheduler(
Expand All @@ -114,32 +150,6 @@ std::unique_ptr<LearningRateScheduler> get_learning_rate_scheduler(
* Solver Parser.
* This class is designed to parse the solver clause of the configure file.
*/
struct SolverParser {
// std::string configure_file;
unsigned long long seed; /**< seed of data simulator */
LrPolicy_t lr_policy; /**< the only fixed lr is supported now. */
int display; /**< the interval of loss display. */
int max_iter; /**< the number of iterations for training */
int num_epochs; /**< the number of epochs for training */
int snapshot; /**< the number of iterations for a snapshot */
std::string snapshot_prefix; /**< naming prefix of snapshot file */
int eval_interval; /**< the interval of evaluations */
int eval_batches; /**< the number of batches for evaluations */
int batchsize_eval; /**< batchsize for eval */
int batchsize; /**< batchsize */
std::string model_file; /**< name of model file */
std::vector<std::string> embedding_files; /**< name of embedding file */
std::vector<std::vector<int>> vvgpu; /**< device map */
bool use_mixed_precision;
float scaler;
std::map<metrics::Type, float> metrics_spec;
bool i64_input_key;
bool use_algorithm_search;
bool use_cuda_graph;
SolverParser(const std::string& file);
SolverParser(){}
};


template <typename T>
struct SparseInput {
Expand Down Expand Up @@ -186,16 +196,57 @@ struct SparseInput {
} \
} while (0)

const std::map<std::string, Layer_t> LAYER_TYPE_MAP = {
{"BatchNorm", Layer_t::BatchNorm},
{"BinaryCrossEntropyLoss", Layer_t::BinaryCrossEntropyLoss},
{"Concat", Layer_t::Concat},
{"CrossEntropyLoss", Layer_t::CrossEntropyLoss},
{"Dropout", Layer_t::Dropout},
{"ELU", Layer_t::ELU},
{"InnerProduct", Layer_t::InnerProduct},
{"Interaction", Layer_t::Interaction},
{"MultiCrossEntropyLoss", Layer_t::MultiCrossEntropyLoss},
{"ReLU", Layer_t::ReLU},
{"Reshape", Layer_t::Reshape},
{"Sigmoid", Layer_t::Sigmoid},
{"Slice", Layer_t::Slice},
{"Multiply", Layer_t::Multiply},
{"FmOrder2", Layer_t::FmOrder2},
{"Add", Layer_t::Add},
{"ReduceSum", Layer_t::ReduceSum},
{"MultiCross", Layer_t::MultiCross},
{"DotProduct", Layer_t::DotProduct}};
const std::map<std::string, Layer_t> LAYER_TYPE_MAP_MP = {
{"BinaryCrossEntropyLoss", Layer_t::BinaryCrossEntropyLoss},
{"Concat", Layer_t::Concat},
{"Cast", Layer_t::Cast},
{"InnerProduct", Layer_t::InnerProduct},
{"FusedInnerProduct", Layer_t::FusedInnerProduct},
{"Interaction", Layer_t::Interaction},
{"Reshape", Layer_t::Reshape},
{"Sigmoid", Layer_t::Sigmoid},
{"Slice", Layer_t::Slice},
{"ReLU", Layer_t::ReLU},
{"Dropout", Layer_t::Dropout},
{"Add", Layer_t::Add}};
const std::map<std::string, Embedding_t> EMBEDDING_TYPE_MAP = {
{"DistributedSlotSparseEmbeddingHash", Embedding_t::DistributedSlotSparseEmbeddingHash},
{"LocalizedSlotSparseEmbeddingHash", Embedding_t::LocalizedSlotSparseEmbeddingHash},
{"LocalizedSlotSparseEmbeddingOneHot", Embedding_t::LocalizedSlotSparseEmbeddingOneHot}};
const std::map<std::string, Initializer_t> INITIALIZER_TYPE_MAP = {
{"Uniform", Initializer_t::Uniform},
{"XavierNorm", Initializer_t::XavierNorm},
{"XavierUniform", Initializer_t::XavierUniform},
{"Zero", Initializer_t::Zero}};

static const std::map<std::string, Optimizer_t> OPTIMIZER_TYPE_MAP = {
{"Adam", Optimizer_t::Adam},
{"MomentumSGD", Optimizer_t::MomentumSGD},
{"Nesterov", Optimizer_t::Nesterov},
{"SGD", Optimizer_t::SGD}};

static const std::map<std::string, Update_t> UPDATE_TYPE_MAP = {
{"Local", Update_t::Local},
{"Global", Update_t::Global},
{"LazyGlobal", Update_t::LazyGlobal}};
{"Local", Update_t::Local}, {"Global", Update_t::Global}, {"LazyGlobal", Update_t::LazyGlobal}};

static const std::map<std::string, Regularizer_t> REGULARIZER_TYPE_MAP = {
{"L1", Regularizer_t::L1},
Expand Down Expand Up @@ -235,11 +286,40 @@ inline T get_value_from_json_soft(const nlohmann::json& json, const std::string
}
}

void parse_data_layer_helper(const nlohmann::json& j, int& label_dim, int& dense_dim,
Check_t& check_type, std::string& source_data,
std::vector<DataReaderSparseParam>& data_reader_sparse_param_array,
std::string& eval_source, std::string& top_strs_label,
std::string& top_strs_dense, std::vector<std::string>& sparse_names,
std::map<std::string, SparseInput<long long>>& sparse_input_map);
template <typename Type>
struct get_optimizer_param {
OptParams<Type> operator()(const nlohmann::json& j_optimizer);
};

template <typename TypeKey, typename TypeFP>
struct create_embedding {
void operator()(std::map<std::string, SparseInput<TypeKey>>& sparse_input_map,
std::vector<TensorEntry>* tensor_entries_list,
std::vector<std::shared_ptr<IEmbedding>>& embedding, Embedding_t embedding_type,
const nlohmann::json& config,
const std::shared_ptr<ResourceManager>& resource_manager, size_t batch_size,
size_t batch_size_eval, bool use_mixed_precision, float scaler,
const nlohmann::json& j_layers);

void operator()(const InferenceParser& inference_parser, const nlohmann::json& j_layers_array,
std::vector<std::shared_ptr<Tensor2<int>>>& rows,
std::vector<std::shared_ptr<Tensor2<float>>>& embeddingvecs,
std::vector<size_t>& embedding_table_slot_size,
std::vector<TensorEntry>* tensor_entries,
std::vector<std::shared_ptr<Layer>>* embeddings,
const std::shared_ptr<GPUResource> gpu_resource,
std::shared_ptr<GeneralBuffer2<CudaAllocator>>& blobs_buff);
};

template <typename TypeKey>
struct create_datareader {
void operator()(const nlohmann::json& j,
std::map<std::string, SparseInput<TypeKey>>& sparse_input_map,
std::vector<TensorEntry>* tensor_entries_list,
std::shared_ptr<IDataReader>& data_reader,
std::shared_ptr<IDataReader>& data_reader_eval, size_t batch_size,
size_t batch_size_eval, bool use_mixed_precision, bool repeat_dataset,
const std::shared_ptr<ResourceManager> resource_manager);
};

} // namespace HugeCTR
5 changes: 3 additions & 2 deletions HugeCTR/src/inference/embedding_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <inference/embedding_interface.hpp>
#include <inference/embedding_cache.hpp>

namespace HugeCTR{

Expand All @@ -39,13 +40,13 @@ embedding_interface* embedding_interface::Create_Embedding_Cache(HugectrUtility<
return embedding_cache;
}

template embedding_interface* embedding_interface::Create_Embedding_Cache<unsigned int>(HugectrUtility<unsigned int>*
template embedding_interface* embedding_interface::Create_Embedding_Cache<unsigned int>(HugectrUtility<unsigned int>*,
int,
bool,
float,
const std::string&,
const std::string&);
template embedding_interface* embedding_interface::Create_Embedding_Cache<long long>(HugectrUtility<long long>*
template embedding_interface* embedding_interface::Create_Embedding_Cache<long long>(HugectrUtility<long long>*,
int,
bool,
float,
Expand Down
2 changes: 1 addition & 1 deletion HugeCTR/src/inference/gpu_cache/nv_gpu_cache.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "<inference/gpu_cache/nv_gpu_cache.hpp>"
#include <inference/gpu_cache/nv_gpu_cache.hpp>

// Overload CUDA atomic for other 64bit unsinged/signed integer type
__forceinline__
Expand Down
1 change: 1 addition & 0 deletions HugeCTR/src/inference/inference_utilis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <inference/inference_utils.hpp>
#include <inference/parameter_server.hpp>

namespace HugeCTR {
template <typename TypeHashKey>
Expand Down
4 changes: 2 additions & 2 deletions HugeCTR/src/inference/parameter_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ parameter_server<TypeHashKey>::parameter_server(const std::string& framework_nam

if(ps_config_.distributed_emb_.size() != model_config_path.size() ||
ps_config_.embedding_vec_size_.size() != model_config_path.size() ||
ps_config_.default_emb_vec_value_.size() != model_config_path.size())){
ps_config_.default_emb_vec_value_.size() != model_config_path.size()){
CK_THROW_(Error_t::WrongInput, "Wrong input: The size of parameter server parameters are not correct.");
}

Expand Down Expand Up @@ -139,7 +139,7 @@ parameter_server<TypeHashKey>::parameter_server(const std::string& framework_nam
for(size_t pair = 0; pair < row_num; pair++){
// Read out the emb_id, slot_id and emb_vec
emb_file.read(reinterpret_cast<char *>(&read_key), sizeof(TypeHashKey));
emb_file.read(reinterpret_cast<char *>(&slod_id), sizeof(size_t));
emb_file.read(reinterpret_cast<char *>(&read_slod_id), sizeof(size_t));
emb_file.read(reinterpret_cast<char *>(read_emb_vec.data()),
sizeof(float) * ps_config_.embedding_vec_size_[i][j]);

Expand Down
Loading

0 comments on commit 8c982d8

Please sign in to comment.