Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(web): check for low-probability exact + exact-key correction matches 📚 #11876

Merged
merged 6 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions common/models/templates/src/trie-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Traversal implements LexiconTraversal {

// Split into individual code units.
let steps = char.split('');
let traversal: ReturnType<Traversal["_child"]> = this;
let traversal: Traversal | undefined = this;

while(steps.length > 0 && traversal) {
const step: string = steps.shift()!;
Expand Down Expand Up @@ -148,6 +148,10 @@ class Traversal implements LexiconTraversal {

*children(): Generator<{char: USVString, traversal: () => LexiconTraversal}> {
let root = this.root;

// We refer to the field multiple times in this method, and it doesn't change.
// This also assists minification a bit, since we can't minify when re-accessing
// through `this.`.
const totalWeight = this.totalWeight;

if(root.type == 'internal') {
Expand Down Expand Up @@ -220,11 +224,10 @@ class Traversal implements LexiconTraversal {
}

get entries() {
const totalWeight = this.totalWeight;
const entryMapper = function(value: Entry) {
const entryMapper = (value: Entry) => {
return {
text: value.content,
p: value.weight / totalWeight
p: value.weight / this.totalWeight
}
}

Expand Down Expand Up @@ -425,7 +428,7 @@ interface Entry {
* Wrapper class for the trie and its nodes.
*/
class Trie {
private root: Node;
public readonly root: Node;
/** The total weight of the entire trie. */
readonly totalWeight: number;
/**
Expand All @@ -451,9 +454,26 @@ class Trie {
* @param prefix
*/
lookup(prefix: string): TextWithProbability[] {
let searchKey = this.toKey(prefix);
let rootTraversal = this.traverseFromRoot().child(searchKey);
return rootTraversal ? getSortedResults(rootTraversal) : [];
const searchKey = this.toKey(prefix);
const rootTraversal = this.traverseFromRoot().child(searchKey);

if(!rootTraversal) {
return [];
}

const directEntries = rootTraversal.entries;
// `Set` requires Chrome 38+, which is more recent than Chrome 35.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are moving away from ES5 (#11881), does that bump our minimum version of Chrome?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does. This PR's main content was written months ago, before we were looking at directly dropping it. #11881 isn't yet merged, either.

const directSet: Record<string, string> = {};
for(const entry of directEntries) {
directSet[entry.text] = entry.text;
}

const bestEntries = getSortedResults(rootTraversal);
const deduplicated = bestEntries.filter((entry) => !directSet[entry.text]);

// Any entries directly hosted on the current node should get full display
// priority over anything from its descendants.
return directEntries.concat(deduplicated);
}

/**
Expand Down
15 changes: 8 additions & 7 deletions common/models/templates/test/test-trie-traversal.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ var smpForUnicode = function(code){
return String.fromCharCode(H, L);
}

// Prob: entry weight / total weight
// "the" is the highest-weighted word in the fixture.
const PROB_OF_THE = 1000 / 500500;
const PROB_OF_TRUE = 607 / 500500;
const PROB_OF_TROUBLE = 267 / 500500;

describe('Trie traversal abstractions', function() {
it('root-level iteration over child nodes', function() {
var model = new TrieModel(jsonFixture('tries/english-1000'));
Expand All @@ -39,10 +45,6 @@ describe('Trie traversal abstractions', function() {
it('traversal with simple internal nodes', function() {
var model = new TrieModel(jsonFixture('tries/english-1000'));

// Prob: entry weight / total weight
// "the" is the highest-weighted word in the fixture.
const PROB_OF_THE = 1000 / 500500;

let rootTraversal = model.traverseFromRoot();
assert.isDefined(rootTraversal);

Expand Down Expand Up @@ -139,7 +141,6 @@ describe('Trie traversal abstractions', function() {

it('traversal over compact leaf node', function() {
var model = new TrieModel(jsonFixture('tries/english-1000'));
const PROB_OF_TROUBLE = 267 / 500500;

let rootTraversal = model.traverseFromRoot();
assert.isDefined(rootTraversal);
Expand All @@ -153,15 +154,15 @@ describe('Trie traversal abstractions', function() {
assert.isDefined(traversalInner1);
assert.isArray(child.traversal().entries);
assert.isEmpty(child.traversal().entries);
assert.equal(traversalInner1.p, 1000 / 500500 /* prob of 'the' */);
assert.equal(traversalInner1.p, PROB_OF_THE);

for(let tChild of traversalInner1.children()) {
if(tChild.char == 'r') {
let traversalInner2 = tChild.traversal();
assert.isDefined(traversalInner2);
assert.isArray(tChild.traversal().entries);
assert.isEmpty(tChild.traversal().entries);
assert.equal(traversalInner2.p, 607 / 500500 /* prob of 'true', the best 'tr-' entry */);
assert.equal(traversalInner2.p, PROB_OF_TRUE);

for(let rChild of traversalInner2.children()) {
if(rChild.char == 'o') {
Expand Down
11 changes: 6 additions & 5 deletions common/models/types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ type TextWithProbability = {

/**
* The probability of the lexical entry, directly based upon its frequency.
*
* A real-number weight, from 0 to 1.
*/
p: number; // real-number weight, from 0 to 1
p: number;
}

/**
Expand Down Expand Up @@ -68,10 +70,9 @@ declare interface LexiconTraversal {
children(): Generator<{char: USVString, traversal: () => LexiconTraversal}>;

/**
* Allows direct access to the traversal state that results when appending a
* `char` representing one or more individual UTF-16 codepoints to the
* current traversal state's prefix. This bypasses the need to iterate
* among all legal child Traversals.
* Allows direct access to the traversal state that results when appending one
* or more codepoints encoded in UTF-16 to the current traversal state's prefix.
* This allows bypassing iteration among all legal child Traversals.
*
* If such a traversal state is not supported, returns `undefined`.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
[
{
"transform": {
"insert": "distracted ",
"insert": "distracted by ",
"deleteLeft": 0
},
"displayAs": "distracted by"
Expand Down
30 changes: 14 additions & 16 deletions common/test/resources/models/simple-dummy.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,29 @@

Model.punctuation = {
quotesForKeepSuggestion: { open: '“', close: '”'},
// Important! Set this, or else the model compositor will
// insert something for us!
insertAfterWord: "",
insertAfterWord: " ",
};

// A direct import/copy from i_got_distracted_by_hazel.json.
Model.futureSuggestions = [
[
{
"transform": {
"insert": "I ",
"insert": "I",
"deleteLeft": 0
},
"displayAs": "I"
},
{
"transform": {
"insert": "I'm ",
"insert": "I'm",
"deleteLeft": 0
},
"displayAs": "I'm"
},
{
"transform": {
"insert": "Oh ",
"insert": "Oh",
"deleteLeft": 0
},
"displayAs": "Oh"
Expand All @@ -42,21 +40,21 @@
[
{
"transform": {
"insert": "love ",
"insert": "love",
"deleteLeft": 0
},
"displayAs": "love"
},
{
"transform": {
"insert": "am ",
"insert": "am",
"deleteLeft": 0
},
"displayAs": "am"
},
{
"transform": {
"insert": "got ",
"insert": "got",
"deleteLeft": 0
},
"displayAs": "got"
Expand All @@ -65,21 +63,21 @@
[
{
"transform": {
"insert": "distracted ",
"insert": "distracted by",
"deleteLeft": 0
},
"displayAs": "distracted by"
},
{
"transform": {
"insert": "distracted ",
"insert": "distracted",
"deleteLeft": 0
},
"displayAs": "distracted"
},
{
"transform": {
"insert": "a ",
"insert": "a",
"deleteLeft": 0
},
"displayAs": "a"
Expand All @@ -88,27 +86,27 @@
[
{
"transform": {
"insert": "Hazel ",
"insert": "Hazel",
"deleteLeft": 0
},
"displayAs": "Hazel"
},
{
"transform": {
"insert": "the ",
"insert": "the",
"deleteLeft": 0
},
"displayAs": "the"
},
{
"transform": {
"insert": "a ",
"insert": "a",
"deleteLeft": 0
},
"displayAs": "a"
}
]
];
];

return Model;
}());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const appleDummySuggestionSets = [[
], [
// Set 2:
{
transform: { insert: '', deleteLeft: 0},
transform: { insert: 'e', deleteLeft: 0},
displayAs: 'apple',
tag: 'keep'
}, {
Expand Down
Loading
Loading