From 69d5c3291745faa184d7c020ce4b394d41744efd Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Mon, 17 Jun 2019 22:35:38 +0800 Subject: [PATCH] fix(user_dictionary, contextual_translation): fix user phrase quality; order contextual suggestions by type --- src/rime/dict/user_dictionary.cc | 3 +-- src/rime/gear/contextual_translation.cc | 7 +++++-- src/rime/gear/script_translator.cc | 2 +- src/rime/gear/single_char_filter.cc | 2 +- src/rime/gear/speller.cc | 8 ++++++-- src/rime/gear/table_translator.cc | 14 +++++++------- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/rime/dict/user_dictionary.cc b/src/rime/dict/user_dictionary.cc index 114da68437..6d09ff686d 100644 --- a/src/rime/dict/user_dictionary.cc +++ b/src/rime/dict/user_dictionary.cc @@ -485,8 +485,7 @@ an UserDictionary::CreateDictEntry(const string& key, (double)v.commits / present_tick, (double)present_tick, v.dee); - constexpr double kUser = 13; // log(1e8) - log(200) - e->weight = kUser + log(weight > 0 ? weight : DBL_EPSILON) + credibility; + e->weight = log(weight > 0 ? weight : DBL_EPSILON) + credibility; if (full_code) { *full_code = key.substr(0, separator_pos); } diff --git a/src/rime/gear/contextual_translation.cc b/src/rime/gear/contextual_translation.cc index 6266195ec9..7aee15a8b5 100644 --- a/src/rime/gear/contextual_translation.cc +++ b/src/rime/gear/contextual_translation.cc @@ -10,14 +10,17 @@ const int kContextualSearchLimit = 32; bool ContextualTranslation::Replenish() { vector> queue; size_t end_pos = 0; + std::string last_type; while (!translation_->exhausted() && cache_.size() + queue.size() < kContextualSearchLimit) { auto cand = translation_->Peek(); DLOG(INFO) << cand->text() << " cache/queue: " << cache_.size() << "/" << queue.size(); - if (cand->type() == "phrase" || cand->type() == "table") { - if (end_pos != cand->end()) { + if (cand->type() == "phrase" || cand->type() == "user_phrase" || + cand->type() == "table" || cand->type() == "user_table") { + if (end_pos != cand->end() || last_type != cand->type()) { end_pos = cand->end(); + last_type = cand->type(); AppendToCache(queue); } queue.push_back(Evaluate(As(cand))); diff --git a/src/rime/gear/script_translator.cc b/src/rime/gear/script_translator.cc index 4ad68f6d5e..00b4f1104a 100644 --- a/src/rime/gear/script_translator.cc +++ b/src/rime/gear/script_translator.cc @@ -491,7 +491,7 @@ void ScriptTranslation::PrepareCandidate() { DLOG(INFO) << "user phrase '" << entry->text << "', code length: " << user_phrase_code_length; cand = New(translator_->language(), - "phrase", + "user_phrase", start_, start_ + user_phrase_code_length, entry); diff --git a/src/rime/gear/single_char_filter.cc b/src/rime/gear/single_char_filter.cc index 87779329ad..d425ed0777 100644 --- a/src/rime/gear/single_char_filter.cc +++ b/src/rime/gear/single_char_filter.cc @@ -40,7 +40,7 @@ bool SingleCharFirstTranslation::Rearrange() { while (!translation_->exhausted()) { auto cand = translation_->Peek(); auto phrase = As(Candidate::GetGenuineCandidate(cand)); - if (!phrase || phrase->type() != "table") { + if (!phrase || phrase->type() != "table" || phrase->type() != "user_table") { break; } if (unistrlen(cand->text()) == 1) { diff --git a/src/rime/gear/speller.cc b/src/rime/gear/speller.cc index e0a5b0501a..714a45279c 100644 --- a/src/rime/gear/speller.cc +++ b/src/rime/gear/speller.cc @@ -33,14 +33,18 @@ static bool reached_max_code_length(const an& cand, return code_length >= max_code_length; } +static inline bool is_table_entry(const an& cand) { + const auto& type = Candidate::GetGenuineCandidate(cand)->type(); + return type == "table" || type == "user_table"; +} + static bool is_auto_selectable(const an& cand, const string& input, const string& delimiters) { return // reaches end of input cand->end() == input.length() && - // is table entry - Candidate::GetGenuineCandidate(cand)->type() == "table" && + is_table_entry(cand) && // no delimiters input.find_first_of(delimiters, cand->start()) == string::npos; } diff --git a/src/rime/gear/table_translator.cc b/src/rime/gear/table_translator.cc index 2dcc4524c1..744683bd4c 100644 --- a/src/rime/gear/table_translator.cc +++ b/src/rime/gear/table_translator.cc @@ -74,14 +74,12 @@ an TableTranslation::Peek() { if (options_) { options_->comment_formatter().Apply(&comment); } - auto phrase = New( - language_, - e->remaining_code_length == 0 ? "table" : "completion", - start_, end_, e); + bool incomplete = e->remaining_code_length != 0; + auto type = incomplete ? "completion" : is_user_phrase ? "user_table" : "table"; + auto phrase = New(language_, type, start_, end_, e); if (phrase) { phrase->set_comment(comment); phrase->set_preedit(preedit_); - bool incomplete = e->remaining_code_length != 0; phrase->set_quality(exp(e->weight) + options_->initial_quality() + (incomplete ? -1 : 0) + @@ -345,6 +343,7 @@ bool TableTranslator::Memorize(const CommitEntry& commit_entry) { string phrase; for (; it != history.rend(); ++it) { if (it->type != "table" && + it->type != "user_table" && it->type != "sentence" && it->type != "uniquified") break; @@ -464,7 +463,8 @@ an SentenceTranslation::Peek() { } size_t code_length = 0; an entry; - if (PreferUserPhrase()) { + bool is_user_phrase = PreferUserPhrase(); + if (is_user_phrase) { auto r = user_phrase_collector_.rbegin(); code_length = r->first; entry = r->second[user_phrase_index_]; @@ -476,7 +476,7 @@ an SentenceTranslation::Peek() { } auto result = New( translator_ ? translator_->language() : NULL, - "table", + is_user_phrase ? "user_table" : "table", start_, start_ + code_length, entry);