-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathflattenTermSequence.js
1262 lines (1180 loc) · 51.7 KB
/
flattenTermSequence.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
var util = require('../../util/util')
var grammarUtil = require('../grammar/grammarUtil')
/**
* Creates a new, flattened `ruleProps` for the term sequence `subnode`.
*
* The new, flattened, terminal `ruleProp` has the following properties:
* 1. terminal - Mark the new `ruleProps` as terminal so that `pfsearch` uses
* `subnode.ruleProps` to generate display text and does not traverse its
* child nodes.
* • Do not bother deleting `subnode.node.subs`, which would be wasteful
* because the absence of `ruleProps.isNonterminal` prevents `pfsearch`
* from checking `subnode.node.subs` anyway. Also, `subnode.node.subs`
* are needed in the rare case of reparsing, which reuses existing nodes.
* 2. cost - `cost`, the minimum cost for `subnode` as a complete subtree: the
* cumulative cost (including any deletion costs) of the subtree that will
* be inaccessible after assigning this new, terminal `ruleProps`.
* • If there are multiple subtrees, then they are ambiguous because their
* text and semantics are defined here (identically), and the minimum
* cost subtree would be chosen anyway.
* 3. semantic - `subnode.ruleProps.semantic`, if defined. The rules it
* produces can not have semantics.
* 4. text - See below.
* 5. tense - See below.
*
* The new `ruleProps` excludes the following properties which the original
* `ruleProps` can contain prior to flattening, because all are specific to
* nonterminal nodes:
* • isTermSequence - Marks `childNode` that have yet to be flattened.
* • isNonterminal - Excluded to mark the flattened term sequences as terminal
* to prevent `pfsearch` from traversing their child nodes.
* • gramProps - Conjugates the term sequence's display text and discarded
* thereafter.
* • insertedSymIdx - Excluded because flattening term sequences into terminal
* nodes removes the need to traverse the node's children.
* • semanticIsReduced - Excluded because `pfsearch` knows all semantics on
* terminal nodes are reduced.
* • secondRHSCanProduceSemantic - Excluded because only applicable to binary
* nonterminal nodes.
*
* For use by `calcHeuristicCosts` while traversing the parse forest to
* calculate the A* heuristic cost estimates.
*
* @static
* @param {Object} subnode The term sequence subnode for which to generate a
* new, terminal `ruleProps`.
* @param {number} cost The cumulative cost of the subtree `subnode` produces.
* @returns {Object} Returns the new, flattened, terminal `ruleProps` for
* `subnode`.
*/
module.exports = function (subnode, cost) {
var subnodeRuleProps = subnode.ruleProps
/**
* If `subnode` is a term sequence with `text` and `insertedSymIdx`, then it
* is an insertion. Create a new `ruleProps` as explained above, extended as
* follows:
* 4. text - Merge `text` values of the matched terminal rules this
* subnode's single child node produces with `subnode.ruleProps.text`
* according to `subnode.ruleProps.insertedSymIdx`.
* 5. tense - The tense of a matched verb terminal rule `subnode` produces,
* for which the associated text object remains unconjugated, to maintain
* tense if the parent rule of `subnode` has matching `acceptedTense`.
*/
if (subnodeRuleProps.insertedSymIdx !== undefined) {
return createTermSequenceInsertionRuleProps(subnode, cost)
}
/**
* If `subnode` is a term sequence with `text` and no `insertedSymIdx`, then
* it is a multi-token substitution. Create a new `ruleProps` as explained
* above, extended as follows:
* 4. text - Keep `subnode.ruleProps.text` for `pfsearch` to use as display
* text instead of the `text` of the matched terminal rules `subnode`
* produces.
* 5. tense - The tense of a matched verb terminal rule `subnode` produces,
* for which the associated text object remains unconjugated, to maintain
* tense for a verb substitution in `text` if the parent rule of
* `subnode` has matching `acceptedTense`.
*/
if (subnodeRuleProps.text) {
return createTermSequenceSubstitutionRuleProps(subnode, cost)
}
if (subnodeRuleProps.rhsTermSequenceIndexes) {
return createPartialTermSequenceRuleProps(subnode)
}
/**
* If `subnode` is a term sequence (without text), then create a new
* `ruleProps` as explained above, extended as follows:
* 4. text - Merge the `text` values of the matched terminal rules this
* subnode's child nodes produces, which `pfsearch` uses as display text.
* 5. tense - The tense of a matched verb terminal rule `subnode`
* produces, for which the associated text object remains unconjugated,
* to maintain tense if the parent rule of `subnode` has matching
* `acceptedTense`.
* 6. personNumber - The grammatical person-number, if any, with which to
* conjugate nominative verbs that follow `subnode` within its subtree.
*/
return createTermSequenceRuleProps(subnode, cost)
}
/**
* Creates a new `ruleProps` for term sequence `subnode` with the following
* properties:
* 1. terminal - The new `ruleProps` is marked terminal so that `pfsearch`
* uses the new `subnode.ruleProps` to generate display text and does not
* traverse its child nodes.
* 2. cost - Uses `cost`, the cumulative cost (including any deletion costs)
* of the subtree `subnode` produces, which will be inaccessible after
* assigning this new, terminal `ruleProps`
* 3. semantic - `subnode.ruleProps.semantic`, if defined.
* 4. text - Merges the `text` values of the matched terminal rules this
* subnode's child nodes produces, which `pfsearch` uses as display text.
* 5. tense - The tense of a matched verb terminal rule `subnode` produces,
* for which the associated text object remains unconjugated, to maintain
* tense if the parent rule of `subnode` has matching `acceptedTense`.
* 6. personNumber - The grammatical person-number, if any, with which to
* conjugate nominative verbs that follow `subnode` within its subtree.
* 7. anaphoraPersonNumber - The grammatical person-number for anaphoric
* rules, if any, with which to match and copy an antecedent semantic of
* the same person-number.
*
* @private
* @static
* @param {Object} subnode The term sequence subnode for which to generate a
* new, flattened, terminal `ruleProps`.
* @param {number} cost The cumulative cost of the subtree `subnode`
* produces.
* @returns {Object} Returns the new, flattened, terminal `ruleProps` for
* `subnode`.
*/
function createTermSequenceRuleProps(subnode, cost) {
if (isIllFormedTermSequence(subnode)) {
throw new Error('Ill-formed term sequence')
}
var subnodeRuleProps = subnode.ruleProps
var leftSub = getChildSub(subnode)
// Get the display text `leftSub` produces, and conjugate with the
// grammatical properties specific to its RHS index, `parentGramProps[0]`,
// if any.
var parentGramProps = subnodeRuleProps.gramProps
var text = getSubnodeText(leftSub, parentGramProps && parentGramProps[0])
/**
* If `text` remains unconjugated, save input tense from `leftSub`,
* otherwise its `tense` was used in its conjugation.
*
* This check is identical to `!parentGramProps[0]`, because
* `conjugateTextObject()` throws an exception if `parentGramProps[0]`
* exists and it can not conjugate the display text.
*
* It will never be the case where `text` is an array containing a
* conjugative text object, yet the text associated with
* `leftSub.ruleProps.tense` was conjugated, because that would require a
* term sequence with multiple text objects (e.g., a pronoun and a verb),
* which the grammar generator prevents.
*/
var tense = text.constructor !== String && leftSub.ruleProps.tense
// Check if `subnode` is a binary node.
var subnodeNext = subnode.next
if (subnodeNext) {
/**
* `subnode` is a binary node. For example:
* `[contribute-to]` -> `[contribute]` -> "contribute", text: `{contribute-verb-forms}`
* -> `[to]` -> "to"
*/
var rightSub = getChildSub(subnodeNext)
// Get the display text `rightSub` produces, and conjugate with the
// grammatical properties specific to its RHS index, `parentGramProps[1]`,
// if any.
var rightText = getSubnodeText(rightSub, parentGramProps && parentGramProps[1])
text = grammarUtil.mergeTextPair(text, rightText)
// If `text` remains unconjugated, save input input tense from `rightSub`,
// otherwise its `tense` was used in its conjugation.
if (rightText.constructor !== String) {
var rightTense = rightSub.ruleProps.tense
/**
* Alert if both subnodes in a binary rule are verbs with `tense` that
* this rule can not conjugate.
*
* The grammar generator forbids a rule marked
* `ruleProps.isTermSequence` to produce multiple verbs without
* `ruleProps.gramProps` to conjugate at least one of the verbs.
* Prevents merging two conjugative verb text objects, each of which
* can have an input `tense`, and passing the merged text array up to
* a parent node without knowing to which text object which input
* `tense` applies.
*/
if (rightTense && tense) {
printSubnodeErr(subnode, subnode.node.subs, 'Term sequence subnode lacks `text`')
throw new Error('Ill-formed term sequence')
}
tense = rightTense
}
}
/**
* `subnode` is a unary node. For example:
* `[like]` -> `[love]` -> "love", text: `{love-verb-forms}`
*
* Even though such a rule does not require `text` merging, the `text` value
* still must be brought up one level for `gramProps` conjugation, which
* only conjugates the immediate child nodes (without this step, the `text`
* object is two levels below the parent rule with `gramProps`).
*/
return {
// The cumulative cost (including any deletion costs) of the subtree
// `subnode` produces.
cost: cost,
text: text,
/**
* Save `tense` associated with the conjugative verb text object in
* `text`, if any. `tense` is assigned above only when its associated
* verb remains unconjugated, and throws an exception if there are
* multiple (conflicting) `tense` values.
*/
tense: tense,
// Exclude `semanticIsReduced` and `secondRHSCanProduceSemantic`, which
// are specific to nonterminal nodes.
semantic: subnodeRuleProps.semantic,
/**
* The person-number property of `[nom-users]` subjects that conjugates
* nominative verbs that follow `subnode` within its subtree. For example:
* `[nom-users]` -> `[1-sg]`, `me`, personNumber: "oneSg"
*/
personNumber: subnodeRuleProps.personNumber,
/**
* The grammatical person-number for anaphoric rules with which to match
* and copy an antecedent semantic of the same person-number.
*
* For example: "(people who follow `{user}` and like) his|her (repos)"
* `[poss-determiner-sg]` -> `[3-sg-poss-det]` -> "his"
* -> anaphoraPersonNumber: "threeSg"
*
* For example: "(people who follow my followers and like) their (repos)"
* `[poss-determiner-pl]` -> `[3-pl-poss-det]` -> "their"
* -> anaphoraPersonNumber: "threePl"
*/
anaphoraPersonNumber: subnodeRuleProps.anaphoraPersonNumber,
}
}
/**
* Checks if `subnode`, which was passed to `createTermSequenceRuleProps()`,
* is ill-formed. If so, prints an error.
*
* @private
* @static
* @param {Object} subnode The `createTermSequenceRuleProps()` term sequence
* subnode to inspect.
* @returns {boolean} Returns `true` if `subnode` is ill-formed, else
* `false`.
*/
function isIllFormedTermSequence(subnode) {
var subnodeRuleProps = subnode.ruleProps
if (!subnodeRuleProps.isTermSequence) {
util.logError('`createTermSequenceRuleProps()` invoked on subnode without `isTermSequence`:', subnode)
return true
}
if (subnodeRuleProps.insertedSymIdx !== undefined) {
util.logError('`createTermSequenceRuleProps()` invoked on subnode with `insertedSymIdx` (should use `createTermSequenceInsertionRuleProps()`):', subnode)
return true
}
if (subnodeRuleProps.text) {
util.logError('`createTermSequenceRuleProps()` invoked on subnode with substitution text (should use `createTermSequenceSubstitutionRuleProps()`):', subnode)
return true
}
return false
}
function createPartialTermSequenceRuleProps(subnode) {
if (isIllFormedPartialTermSequence(subnode)) {
throw new Error('Ill-formed partial term sequence')
}
var subnodeRuleProps = subnode.ruleProps
var termSequenceRHSIndex = subnodeRuleProps.rhsTermSequenceIndexes[0] ? 0 : 1
var childSub = getChildSub(termSequenceRHSIndex === 0 ? subnode : subnode.next)
/**
* Attempt to conjugate the term sequence's text, if conjugative.
*
* Only pass the sequence's `gramProps` after checking here if conjugation
* will succeed because `conjugateText()` does not support failed
* conjugation attempts and returning a non-string.
* • If `termSequenceRHSIndex` is 1, conjugation may fail if the text
* requires a nominative `personNumber` property from the leading,
* non-sequence branch, which can not be determined until traversed by
* `pfsearch`. For example, the binary pair: `[nom-users+]` `[like]`.
* • Always attempt conjugation if `termSequenceRHSIndex` is 0 because any
* preceding `personNumber` property does not apply because it is outside
* its subtree.
*/
var text
var parentGramProps = subnodeRuleProps.gramProps
var parentGramPropsTerm = parentGramProps && parentGramProps[termSequenceRHSIndex]
if (termSequenceRHSIndex === 0 || (parentGramPropsTerm && (parentGramPropsTerm.form || (parentGramPropsTerm.acceptedTense && parentGramPropsTerm.acceptedTense === childSub.ruleProps.tense)))) {
text = getSubnodeText(childSub, parentGramPropsTerm)
} else {
text = getSubnodeText(childSub)
}
// Temporarily save `gramProps` for the non-sequence subnode. Only exists
// for terminal rules that are not yet implemented as term sequences: `[follow]`.
var newGramProps
var otherParentGramProps = parentGramProps && parentGramProps[+!termSequenceRHSIndex]
if (otherParentGramProps) {
newGramProps = { 0: otherParentGramProps }
}
/**
* Sum `subnodeRuleProps.cost` with the cost of the term sequence
* subnode's branch, which is its `minCost` because only the cheapest
* branch is used because term sequences are ambiguous (no semantics).
*
* Calculate before overwriting `subnode.node` below.
*
* Do not change the remaining subnode's `minCost`, because the value
* excludes the subnode's `ruleProps.cost` itself and is therefore
* unaffected.
*/
var newCost = subnodeRuleProps.cost + (termSequenceRHSIndex === 0 ? subnode.node : subnode.next.node).minCost
/**
* If the left child subnode is the term sequence to insert, replace
* `subnode.node` -> `subnode.next.node` for `pfsearch` to only traverse
* the right child subnode.
*
* Need not delete `subnode.next` after flattening the subnode because
* `pfsearch` does not check for it when it sees
* `ruleProps.insertedSymIdx`.
*/
if (termSequenceRHSIndex === 0) {
subnode.node = subnode.next.node
}
/**
* Create a new `ruleProps` that inserts the term sequence's text at its
* original RHS index.
*
* Exclude the following properties:
* • rhsTermSequenceIndexes - No longer specify `subnode` is a partial
* term sequence. Necessary when reparsing (with all tokens marked
* deletable after the initial parse fails) to prevent
* `calcHeuristicCosts` from attempting to flatten the already flattened
* partial term sequence subnode.
* • gramProps - Only exists for term sequence subnodes, and because the
* remaining subnode is not a term sequence, then `gramProps` must be
* undefined for that RHS index.
* • secondRHSCanProduceSemantic - Only applicable to binary subnodes.
* • tense - Only applicate to terminal `ruleProps`. For use by `pfsearch`
* to conjugate non-insertion text, or by `flattenTermSequence` to
* transfer the property to parent sequences (which partial term sequences
* can not have).
*/
return {
// Unlike other flattened `ruleProps`, partial term sequences rename
// nonterminal because they are insertions.
isNonterminal: true,
// The sum of `subnodeRuleProps.cost` and the (inserted) term sequence
// subnode's cost (`minCost`).
cost: newCost,
// The term sequence insertion text.
text: text,
// The term sequence's RHS index with which position `text`.
insertedSymIdx: termSequenceRHSIndex,
// The semantic properties. `subnodeRuleProps.insertedSemantic` is
// guaranteed to be `undefined` because the insertion is a term sequence,
// which always lack semantics.
semantic: subnodeRuleProps.semantic,
// Likely always `true` because the remaining child subnode is not a term
// sequence. Can be falsey, however, if produces only terminal rules not
// yet implemented as term sequences.
semanticIsReduced: subnodeRuleProps.semanticIsReduced,
/**
* Specify the remaining RHS subnode can produce a semantic.
* • No need to check `subnodeRuleProps.secondRHSCanProduceSemantic`
* (i.e., if `termSequenceRHSIndex` is 0) because the term sequence
* subnode is guaranteed to not produce semantics, hence
* `subnodeRuleProps.rhsCanProduceSemantic` can only apply to the
* (remaining) non-sequence subnode.
* • Likely always `true` because the remaining child subnode is not a
* term sequence. Can be falsey, however, if produces only terminal
* rules not yet implemented as term sequences.
*/
rhsCanProduceSemantic: subnodeRuleProps.rhsCanProduceSemantic,
// Temporarily save `gramProps` for the non-sequence subnode. Only exists
// for terminal rules that are not yet implemented as term sequences:
// `[follow]`.
gramProps: newGramProps,
}
}
/**
* Checks if `subnode`, which was passed to
* `createPartialTermSequenceRuleProps()`, is ill-formed. If so, prints an
* error.
*
* @private
* @static
* @param {Object} subnode The `createPartialTermSequenceRuleProps()`
* partial term sequence subnode to inspect.
* @returns {boolean} Returns `true` if `subnode` is ill-formed, else
* `false`.
*/
function isIllFormedPartialTermSequence(subnode) {
if (!subnode.next) {
util.logError('Partial term sequence subnode is unary:', subnode)
return true
}
var subnodeRuleProps = subnode.ruleProps
if (!subnodeRuleProps.rhsTermSequenceIndexes) {
util.logError('Partial term sequence subnode lacks `rhsTermSequenceIndexes`:', subnode)
return true
}
if (subnodeRuleProps.rhsTermSequenceIndexes[0] && subnodeRuleProps.rhsTermSequenceIndexes[1]) {
util.logError('Partial term sequence subnode specifies both RHS symbols as term sequences:', subnode)
return true
}
if (subnodeRuleProps.insertedSymIdx !== undefined) {
util.logError('Partial term sequence subnode has `insertedSymIdx`:', subnode)
return true
}
if (subnodeRuleProps.text) {
util.logError('Partial term sequence subnode has substitution `text`:', subnode)
return true
}
return false
}
/**
* Creates a new `ruleProps` for term sequence substitution `subnode` with the
* following properties:
* 1. terminal - The new `ruleProps` is marked terminal so that `pfsearch`
* uses the new `subnode.ruleProps` to generate display text and does not
* traverse its child nodes.
* 2. cost - Uses `cost`, the cumulative cost (including any deletion costs)
* of the subtree `subnode` produces, which will be inaccessible after
* assigning this new, terminal `ruleProps`
* 3. semantic - `subnode.ruleProps.semantic`, if defined.
* 4. text - Keeps `subnode.ruleProps.text` for `pfsearch` to use as display
* text instead of the `text` of the matched terminal rules `subnode`
* produces.
* 5. tense - The tense of a matched verb terminal rule `subnode` produces,
* for which the associated text object remains unconjugated, to maintain
* tense for a verb substitution in `text` if the parent rule of `subnode`
* has matching `acceptedTense`.
*
* @private
* @static
* @param {Object} subnode The term sequence substitution subnode for which to
* generate a new, flattened, terminal `ruleProps`.
* @param {number} cost The cumulative cost of the subtree `subnode` produces.
* @returns {Object} Returns the new, flattened, terminal `ruleProps` for
* `subnode`.
*/
function createTermSequenceSubstitutionRuleProps(subnode, cost) {
if (isIllFormedTermSequenceSubstitution(subnode)) {
throw new Error('Ill-formed term sequence substitution')
}
var subnodeRuleProps = subnode.ruleProps
return {
// The cumulative cost (including any deletion costs) of the subtree
// `subnode` produces.
cost: cost,
text: subnodeRuleProps.text,
/**
* Save the input tense of any verb terminal rule `subnode` produces to
* maintain optional tense in substitution `text` if the parent rule of
* `subnode` has matching `acceptedTense`. For example:
* `[contribute-to]` -> "worked on" (input) -> "contributed to" (past tense maintained)
*/
tense: getNonterminalSubstitutionInputTense(subnode),
// Exclude `semanticIsReduced` and `secondRHSCanProduceSemantic`, which
// are specific to nonterminal nodes.
semantic: subnodeRuleProps.semantic,
}
}
/**
* Gets the input tense of any verb terminal rules `subnode` produces to
* maintain tense for any verb substitutions whose parent rule has matching
* `acceptedTense`.
*
* For example:
* `[contribute-to]` -> "worked on" (input) -> "contributed to" (past tense maintained).
*
* `subnode` can be an insertion for a substitution. For example:
* `[contribute-to]` -> "work" (input) -> `[work]` `[on]`, text: `[ {contribute-verb-forms}, "to" ]`
*
* @private
* @static
* @param {Object} subnode The subnode for which to get the `tense` value of
* any matched verb terminal rule it produces.
* @returns {string|undefined} Returns the `tense` of any matched verb
* terminal rule `subnode` produces, else `undefined`.
*/
function getNonterminalSubstitutionInputTense(subnode) {
var leftSub = getChildSub(subnode)
// Check if `subnode` is a binary node, else it is an insertion for a
// substitution.
var subnodeNext = subnode.next
if (subnodeNext) {
var rightSub = getChildSub(subnodeNext)
/**
* The grammar generator forbids a rule marked
* `ruleProps.isTermSequence` to produce multiple verbs without
* `ruleProps.gramProps`. Prevents merging two conjugative verb text
* objects, each of which can have an input `tense`, and passing the
* merged text array up to a parent node without knowing to which text
* object which input `tense` applies.
*/
return leftSub.ruleProps.tense || rightSub.ruleProps.tense
}
/**
* `subnode` is an insertion for a substitution. For example:
* `[contribute-to]` -> "work" (input) -> `[work]` `[on]`, text: `[ {contribute-verb-forms}, "to" ]`
*/
return leftSub.ruleProps.tense
}
/**
* Checks if `subnode`, which was passed to
* `createTermSequenceSubstitutionRuleProps()`, is ill-formed. If so, prints
* an error.
*
* @private
* @static
* @param {Object} subnode The `createTermSequenceSubstitutionRuleProps()`
* term sequence substitution subnode to inspect.
* @returns {boolean} Returns `true` if `subnode` is ill-formed, else
* `false`.
*/
function isIllFormedTermSequenceSubstitution(subnode) {
var subnodeRuleProps = subnode.ruleProps
if (!subnodeRuleProps.isTermSequence) {
util.logError('`createTermSequenceSubstitutionRuleProps()` invoked on subnode without `isTermSequence`:', subnode)
return true
}
if (!subnodeRuleProps.text) {
util.logError('`createTermSequenceSubstitutionRuleProps()` invoked on subnode without substitution text (should use `createTermSequenceRuleProps()`):', subnode)
return true
}
if (subnodeRuleProps.insertedSymIdx !== undefined) {
util.logError('`createTermSequenceSubstitutionRuleProps()` invoked on subnode with `insertedSymIdx` (should use `createTermSequenceInsertionRuleProps()`):', subnode)
return true
}
if (subnodeRuleProps.gramProps) {
util.logError('`gramProps` exists on term sequence substitution, which should have conjugated `text` during grammar generation and then discarded:', subnode)
return true
}
return false
}
/**
* Creates a new `ruleProps` for term sequence insertion `subnode` with the
* following properties:
* 1. terminal - The new `ruleProps` is marked terminal so that `pfsearch`
* uses the new `subnode.ruleProps` to generate display text and does not
* traverse its child nodes.
* 2. cost - Uses `cost`, the cumulative cost (including any deletion costs)
* of the subtree `subnode` produces, which will be inaccessible after
* assigning this new, terminal `ruleProps`
* 3. semantic - `subnode.ruleProps.semantic`, if defined.
* 4. text - Merges `text` values of the matched terminal rules this subnode's
* single child node produces with `subnode.ruleProps.text` according to
* `subnode.ruleProps.insertedSymIdx`.
* 5. tense - The tense of a matched verb terminal rule `subnode` produces,
* for which the associated text object remains unconjugated, to maintain
* tense if the parent rule of `subnode` has matching `acceptedTense`.
*
* For example:
* `[contribute-to]` -> `[contribute]`, text: "to"
* -> "contribute" (input), text: `{contribute-verb-forms}`
* -> text: `[ {contribute-verb-forms}, "to" ]` (merged text values)
*
* @private
* @static
* @param {Object} subnode The term sequence insertion subnode for which to
* generate a new, flattened, terminal `ruleProps`.
* @param {number} cost The cumulative cost of the subtree `subnode` produces.
* @returns {Object} Returns the new, flattened, terminal `ruleProps` for
* `subnode`.
*/
function createTermSequenceInsertionRuleProps(subnode, cost) {
if (isIllFormedTermSequenceInsertion(subnode)) {
throw new Error('Ill-formed term sequence insertion')
}
var subnodeRuleProps = subnode.ruleProps
var childSub = getChildSub(subnode)
/**
* Get the display text `childSub` produces, and conjugate with the
* grammatical properties specific to its RHS index, if any. For example,
* in order of conjugative property priority:
* • If `subnodeRuleProps.gramProps[0].acceptedTense` and input tense,
* `childSub.ruleProps.tense`, exist and match:
* rhs: [ `[like]` ],
* gramProps: { 0: { acceptedTense: "past" } },
* childSub.ruleProps.tense: "past",
* => "liked"
*
* • Else if `subnodeRuleProps.gramProps[0].form` exists:
* rhs: [ `[create]` ],
* gramProps: { 0: { form: "past" } },
* => "created"
*
* • Else if `subnodeRuleProps.insertedSymIdx` is 0 and
* `subnodeRuleProps.personNumber` exists; i.e., when the inserted `text`
* was generated from a nominative subject:
* rhs: [ `[have]` ],
* insertedSymIdx: 0,
* personNumber: "oneSg",
* => "have"
*/
var parentGramProps = subnodeRuleProps.gramProps
var insertedPersonNumber = subnodeRuleProps.insertedSymIdx === 0 && subnodeRuleProps.personNumber
var childSubText = getSubnodeText(childSub, parentGramProps && parentGramProps[0], insertedPersonNumber)
// Excludes `insertedSymIdx` because this flattens the term sequence into a
// terminal node, removing the need to traverse the node's children.
return {
// The cumulative cost (including any deletion costs) of the subtree
// `subnode` produces.
cost: cost,
// Merge insertion `text` with matched terminal rule `text` according to
// `insertedSymIdx`.
text: subnodeRuleProps.insertedSymIdx === 1
? grammarUtil.mergeTextPair(childSubText, subnodeRuleProps.text)
: grammarUtil.mergeTextPair(subnodeRuleProps.text, childSubText),
/**
* Save the input tense of any verb terminal rule `subnode` produces that
* this rule can not conjugate, to maintain optional tense if the parent
* rule of `subnode` has matching `acceptedTense`. For example:
* `[contribute-to]` -> `[contribute]`, text: "to"
* -> "contributed" (input), text: `{contribute-verb-forms}`
* -> text: "contributed to" (merged text values)
*
* If `childSubText` remains unconjugated, save input tense from
* `childSub`, otherwise its `tense` was used in its conjugation.
*/
tense: childSubText.constructor !== String && childSub.ruleProps.tense,
// Exclude `semanticIsReduced` and `secondRHSCanProduceSemantic`, which
// are specific to nonterminal nodes.
semantic: subnodeRuleProps.semantic,
/**
* Do not save `subnodeRuleProps.personNumber` from a term sequence
* insertion because it has yet to needed.
*
* Were `personNumber` needed, would likely be when `insertedSymIdx` is
* 1. If 0, then the `personNumber` value would have been from the
* inserted branch and used in conjugation above of the non-inserted
* portion, after which the subtree to which that `personNumber` applied
* would be complete.
*
* Ideally, however, should always save `personNumber` because the
* property could have been assigned to the insertion's original base
* rule instead of derived from its inserted portion, which is
* indistinguishable. If the former, then the property applies beyond
* this subtree.
*/
// personNumber: subnodeRuleProps.insertedSymIdx === 1 && subnodeRuleProps.personNumber,
}
}
/**
* Checks if `subnode`, which was passed to
* `createTermSequenceInsertionRuleProps()`, is ill-formed. If so, prints an
* error.
*
* @private
* @static
* @param {Object} subnode The `createTermSequenceInsertionRuleProps()` term
* sequence insertion subnode to inspect.
* @returns {boolean} Returns `true` if `subnode` is ill-formed, else
* `false`.
*/
function isIllFormedTermSequenceInsertion(subnode) {
var subnodeRuleProps = subnode.ruleProps
if (!subnodeRuleProps.isTermSequence) {
util.logError('`createTermSequenceInsertionRuleProps()` invoked on subnode without `isTermSequence`:', subnode)
return true
}
if (!subnodeRuleProps.text) {
util.logError('`createTermSequenceInsertionRuleProps()` invoked on subnode without insertion text (should use `createTermSequenceRuleProps()`):', subnode)
return true
}
if (subnodeRuleProps.insertedSymIdx === undefined) {
util.logError('`createTermSequenceInsertionRuleProps()` invoked on subnode without `insertedSymIdx` (should use `createTermSequenceSubstitutionRuleProps()`):', subnode)
return true
}
// Check for instances of `personNumber` on term sequence insertions when
// `insertedSymIdx` is 1. This has not been seen and consequently
// `createTermSequenceInsertionRuleProps()` does not currently check for it.
if (subnodeRuleProps.personNumber && subnodeRuleProps.insertedSymIdx === 1) {
util.logError('`personNumber` found on term sequence insertion with `insertedSymIdx` of 1:', subnode)
return true
}
// Do not check because insertions with `[blank-inserted]` are binary nodes.
// if (subnode.next) {
// util.logError('Insertion subnode is binary:')
// util.logObjectAtDepth(subnode, 3)
// return true
// }
return false
}
/**
* Gets the child subnode the term sequence `subnode` produces (i.e., in
* `subnode.node.subs`).
*
* `subnode` almost always produces a single child subnode, but in the rare
* cases (documented in `getCheapestChildSub()`) of multiple child subnodes,
* the term sequence is ambiguous (because semantically identical) and the
* cheapest must be chosen.
*
* @private
* @static
* @param {Object} subnode The term sequence subnode.
* @returns {Object} Returns the subnode `subnode` produces.
*/
function getChildSub(subnode) {
var childSubs = subnode.node.subs
var childSub = childSubs.length === 1 ? childSubs[0] : getCheapestChildSub(subnode)
// Check for unsupported properties on term sequence child subnodes.
if (isIllFormedChildSubnode(childSub, subnode)) {
throw new Error('Ill-formed term sequence child subnode')
}
return childSub
}
/**
* Gets the cheapest child subnode the ambiguous term sequence `subnode`
* produces (i.e., in `subnode.node.subs`).
*
* For use by `getChildSub()` on term sequences that produce multiple child
* subnodes. Such subnodes are ambiguous because term sequences lack
* semantics, which makes them semantically identical. Only occurs in rare
* cases, documented below.
*
* @private
* @static
* @param {Object} subnode The ambiguous term sequence subnode.
* @returns {Object} Returns the cheapest child subnode `subnode` produces.
*/
function getCheapestChildSub(subnode) {
var childSubs = subnode.node.subs
var childSubsLen = childSubs.length
if (childSubsLen < 2) {
util.logError('`getCheapestChildSub()` invoked on (unambiguous) subnode that does not have multiple child subnodes:', subnode)
throw new Error('Ill-formed ambiguous term sequence')
}
/**
* `subnode` produces multiples subnodes. Choose the cheapest.
*
* This only occurs when the term sequence `subnode` is ambiguous, because
* term sequences can not produce semantics; they only differ by cost and
* display text. Ideally, term sequence subnodes are never ambiguous, but
* the overhead to avoid every cause of such ambiguity outweighs the cost of
* its rarity.
*
* The following are causes of term sequence ambiguity. Some can and should
* be avoided, and others can not:
* 1. Ambiguous grammar rules. For example, consider the term sequence `X`:
* X -> "a"
* X -> Y -> "a"
*
* When "a" is matched in input, the term sequence `X` will have two
* subnodes. The grammar design causes this, and the grammar generator
* should prevent this.
*
* 2. Terminal symbol deletions, which can occur in two way:
* A. Deleltables defined in the grammar.
* B. After `Parser` fails to reach the start node or `pfsearch` fails to
* generate legal parse trees (due to contradictory semantics) on the
* initial parse, as a last resort, `Parser` reparses the input query
* with all tokens marked deletable.
*
* These deletions enable the following possible instances of term
* sequence ambiguity:
* A. Consider the binary RHS `X1 X2`, where `X2` produces a term
* sequence:
* X1, X2 -> Y -> A -> "a"
* -> B -> "b"
* X1, X2 -> Y -> A, text: "b" -> "a"
*
* The rule `X2 -> Y` is a term sequence because every RHS symbol
* (i.e., just `Y`) produces a term sequence. `calcCostHeuristics`
* flattens `Y` by merging the `text` of the rules it produces, and
* assigning the text to the rule `X2 -> Y` with a new, terminal
* `ruleProps`.
*
* As shown, the term sequence `Y` recognizes the input "a b" and "a"
* with insertion text "b". Provided the input "a b", and with "b"
* marked deletable, `Y` will have two matches for the same token span:
* "a b", "a <b>" (where "b" is deleted).
*
* The grammar generator should prevent this by forbidding unary term
* sequences (but, might be difficult for insertion rules within nested
* term sequences). This example would not occur if `Y` replaces `X2`
* in the initial binary rule.
*
* B. Consider the following input:
* "followers of my mine"
*
* Consider a terminal rule set produces both "my" and "mine" (though
* both have display text "my"), and all tokens are marked deletable.
* This yields a node for the term sequence with two subnodes, each of
* which spans the last two input tokens, but contains different
* terminal rules: "my <mine>", "<my> mine". This is unavoidable in the
* grammar.
*
* C. Consider the following input:
* "followers of mine mine"
*
* "mine" and "mine" will obviously be the same terminal rule. With
* "mine" marked deletable, there will be two subnodes for the same LHS
* symbol, spanning the last two input tokens: "mine <mine>", "<mine>
* mine". This is unavoidable in the grammar.
*
* A terminal node table in `Parser.prototype.addTermRuleNodes()` could
* detect these duplicate instances of the same LHS symbol over the
* same span, similar to the nonterminal node table used in
* `Parser.prototype.addSub()`, but the overhead is too great for such
* rarity (even if the table is only used during the second parse).
*
*
* `calcHeuristicCosts` must choose the cheapest subnode for ambiguous term
* sequence matches, as opposed to when `Parser.prototype.addSub()` first
* adds these subnodes:
* 1. These cost comparisons will not always work in
* `Parser.prototype.addSub()` because the comparisons require completing
* all reductions of a subnode to know its cheapest subnode (i.e., a
* nested term sequence) before reducing with its parent term sequence
* node. However, `Parser` may reduce a parent node with a given node
* before reducing all of the latter node's child nodes. Moreover,
* `Parser` can not determine if all reductions for a node are complete
* until parsing completes. Hence, a comparison at that state might have
* an inaccurate minimum cost. For example, consider the term sequence
* `X`:
* X -> A -> "z", cost: 0.5
* X -> B -> "z", cost: 1
* B -> C -> "z", cost: 0
*
* Suppose `Parser` compares the reduction for `X -> B -> "z"` against
* `X -> A -> "z"`, finds the former more expensive, and discards it.
* Later, `Parser` reduces the third rule sequence above,
* `B -> C -> "z"`, which makes the reduction `X -> B` cheaper than
* `X -> A`; however, it is too late because the reduction `X -> B` was
* already discarded and will not reappear.
*
* 2. Though `Parser.prototype.addSub()` could catch most instances of term
* sequence ambiguity (i.e., #1 is uncommon), it would be inefficient to
* search for the cheapest term sequence for parse nodes that might never
* reach the start node. In contrast, every term sequence subnode
* comparison in `calcHeuristicCosts` had to have reached the start node.
*/
/**
* Check the ambiguous term sequence includes a terminal symbol deletion:
* spans multiple input tokens and has the minimum deletion cost (1 for
* grammar-defined deletables).
* • Check every child subnode has the minimum deletion cost, instead of
* only checking `subnode.node.minCost`, to avoid halting for a
* grammar-defined stop-word that is ambiguous with a term marked
* `Parser` marked as deletable on a reparse.
*
* Does not catch all grammar-induced instances of term sequence
* ambiguity. E.g., a multi-token term sequence with high non-edit rule
* costs will evade this check. Though, unlikely because term sequences
* lack semantics.
* • Can be tracked absolutely with `ruleProps.hasDeletion`, but it is
* best to avoid the complexity for such a rare error which ideally is
* caught in grammar generation.
*/
if (subnode.size === 1 || childSubs.every(childSub => childSub.ruleProps.cost < 1)) {
util.logError('Term sequence ambiguity caused by ill-formed grammar (not deletion):', subnode)
util.log.apply(null, [ '\nAmbiguous child subnodes:' ].concat(childSubs))
throw new Error('Ill-formed term sequence')
}
/**
* Choose the cheapest subnode for the ambiguous term sequence.
*
* The cheapest subnode is the same child node that set `subnode.minCost`
* in `calcHeuristicCosts()`.
*
* All subnodes here likely have identical display text (and input tense)
* by the nature of the possible sources of this ambiguity, documented
* above.
*/
var cheapestChildSub = childSubs[0]
var cheapestChildSubCost = cheapestChildSub.ruleProps.cost
for (var s = 1; s < childSubsLen; ++s) {
var childSub = childSubs[s]
var childSubCost = childSub.ruleProps.cost
/**
* Compare `childSub.ruleProps.cost`, not `childSub.node.minCost`,
* because `calcHeuristicCosts` already traversed and flattened
* `subnode.node.subs`, merging their `minCost` with their
* `ruleProps.cost` on their new, terminal `ruleProps`. Also,
* `ruleProps.cost` of binary nodes includes the cost of both subnodes.
*/
if (childSubCost < cheapestChildSubCost) {
cheapestChildSub = childSub
cheapestChildSubCost = childSubCost
}
}
return cheapestChildSub
}
/**
* Checks if term sequence child subnode `childSub` has unsupported
* `ruleProps` properties. If so, prints an error.
*
* These properties have yet to be seen on child subnodes, though would be
* easy to support.
*
* @private
* @static
* @param {Object} childSub The term sequence child subnode to inspect.
* @param {Object} subnode The term sequence subnode that produces
* `childSub`.
* @returns {boolean} Returns `true` if `childSub` is ill-formed, else
* `false`.
*/
function isIllFormedChildSubnode(childSub, subnode) {
var childRuleProps = childSub.ruleProps
var forbiddenChildProps = [ 'personNumber', 'anaphoraPersonNumber', 'semantic' ]
for (var p = 0; p < 3; ++p) {
var propName = forbiddenChildProps[p]
if (childRuleProps[propName]) {
util.logError('Term sequence child subnode has forbidden property `', propName, '`:')
util.log(' subnode:', subnode)
util.log(' child:', childSub)
return true
}
}
return false
}
/**
* Gets the display text `subnode` produces (i.e., `subnode.ruleProps.text`).
*
* If `parentGramProps` is defined and `subnode.ruleProps.text` is a
* conjugative display text object or array, returns the conjugated text
* string.
* • If `parentGramProps.acceptedTense` and input tense,
* `subnode.ruleProps.tense`, exist and match, conjugates the verb with the
* optional tense accepted when input. If `subnode` itself has nested term
* sequences (i.e., is multi-token), then `subnode.ruleProps.tense` is an
* input tense that has propagated upward. For example:
* subnode: `[like]`,
* subnode.ruleProps.tense: "past",
* parentGramProps: { acceptedTense: "past" },
* => "liked"
* • Else if `parentGramProps.form` exists, conjugates the verb. For example:
* subnode `[create]`,
* parentGramProps: { form: "past" },
* => "created"
* • `flattenTermSequence` does not check if a parent node has `gramProps`