Skip to content

Commit

Permalink
chore: log dict file info and line number info when deploying warning (
Browse files Browse the repository at this point in the history
  • Loading branch information
fxliang authored Jun 18, 2024
1 parent 79dda08 commit 1e14721
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
18 changes: 14 additions & 4 deletions src/rime/dict/entry_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ void EntryCollector::LoadPresetVocabulary(DictSettings* settings) {

void EntryCollector::Collect(const path& dict_file) {
LOG(INFO) << "collecting entries from " << dict_file;
current_dict_file = dict_file.u8string();
line_number = 0;
// read table
std::ifstream fin(dict_file.c_str());
DictSettings settings;
Expand All @@ -69,13 +71,15 @@ void EntryCollector::Collect(const path& dict_file) {
int weight_column = settings.GetColumnIndex("weight");
int stem_column = settings.GetColumnIndex("stem");
if (text_column == -1) {
LOG(ERROR) << "missing text column definition.";
LOG(ERROR) << "missing text column definition in file: " << dict_file
<< ".";
return;
}
bool enable_comment = true;
string line;
while (getline(fin, line)) {
boost::algorithm::trim_right(line);
line_number++;
// skip empty lines and comments
if (line.empty())
continue;
Expand All @@ -90,7 +94,9 @@ void EntryCollector::Collect(const path& dict_file) {
auto row = strings::split(line, "\t");
int num_columns = static_cast<int>(row.size());
if (num_columns <= text_column || row[text_column].empty()) {
LOG(WARNING) << "Missing entry text at #" << num_entries << ".";
LOG(WARNING) << "Missing entry text at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
continue;
}
const auto& word(row[text_column]);
Expand Down Expand Up @@ -168,15 +174,19 @@ void EntryCollector::CreateEntry(const string& word,
try {
percentage = std::stod(weight_str.substr(0, weight_str.length() - 1));
} catch (...) {
LOG(WARNING) << "invalid entry definition at #" << num_entries << ".";
LOG(WARNING) << "invalid entry definition at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
percentage = 100.0;
}
e->weight *= percentage / 100.0;
} else if (!weight_str.empty()) { // absolute weight
try {
e->weight = std::stod(weight_str);
} catch (...) {
LOG(WARNING) << "invalid entry definition at #" << num_entries << ".";
LOG(WARNING) << "invalid entry definition at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
e->weight = 0.0;
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/rime/dict/entry_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class EntryCollector : public PhraseCollector {
set<string /* word */> collection;
WordMap words;
WeightMap total_weight;

private:
string current_dict_file;
size_t line_number;
};

} // namespace rime
Expand Down

0 comments on commit 1e14721

Please sign in to comment.