-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathNSymbol.js
1774 lines (1632 loc) · 70.5 KB
/
NSymbol.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
var util = require('../../util/util')
var g = require('./grammar')
var grammarUtil = require('./grammarUtil')
var semantic = require('./semantic')
// Instruct `util.getModuleCallerLocation()` to skip the `NSymbol` module when
// searching the call stack for `NSymbol` instantiation file paths used in
// error reporting.
util.skipFileInLocationRetrieval()
/**
* The map of the grammar's nonterminal symbols to rule arrays.
*
* @type {Object.<string, Object[]>}
*/
NSymbol._ruleSets = {}
/**
* The map of `NSymbol` names to definition lines (file-path + line-number).
* For use in error messages.
*
* @type {Object.<string, string>}
*/
NSymbol._defLines = {}
/**
* The map of `NSymbol` names to the respective `NSymbol` instance.
*
* @private
* @type {Object.<string, NSymbol>}
*/
var _NSymbols = {}
/**
* Creates a nonterminal symbol and adds it to the grammar.
*
* @constructor
* @param {...string} [nameTokens] The tokens to hyphenate for the new
* nonterminal symbol name.
*/
function NSymbol() {
// Check if constructor invoked without `new` keyword.
if (!(this instanceof NSymbol)) {
var newNSymbol = Object.create(NSymbol.prototype)
NSymbol.apply(newNSymbol, arguments)
return newNSymbol
}
// Hyphenate and format provided name tokens for the nonterminal symbol
// name.
this.name = '[' + grammarUtil.formatStringForName(grammarUtil.hyphenate.apply(null, arguments)) + ']'
// Check if `this.name` exists in `NSymbol._defLines` before appending it to
// `NSymbol._defLines`.
if (grammarUtil.isDuplicateName(this.name, NSymbol._defLines, 'nonterminal symbol')) {
throw new Error('Duplicate nonterminal symbol name')
}
// Save instantiation file path and line number for error reporting.
NSymbol._defLines[this.name] = util.getModuleCallerLocation()
// The array of rules `this.name` produces.
this.rules = NSymbol._ruleSets[this.name] = []
_NSymbols[this.name] = this
}
/**
* Adds a new rule to the grammar with this `NSymbol` as the LHS symbol.
*
* @memberOf NSymbol
* @param {Object} options The options object.
* @param {Object} [options.isTerminal=false] Specify the rule is terminal.
* @returns {NSymbol} Returns this `NSymbol` instance.
*/
NSymbol.prototype.addRule = function (options) {
if (this.isTermSet) {
util.logErrorAndPath('Attempting to add a rule to a completed terminal rule set:', util.stylize(this.name), options)
throw new Error('Adding rule to completed terminal rule set')
}
if (this.isBinarySymbol) {
util.logErrorAndPath('Attempting to add a rule to a binary symbol (i.e., limited to a single binary rule):', util.stylize(this.name), options)
throw new Error('Adding rule to binary symbol')
}
var newRule = options.isTerminal ? this._newTerminalRule(options) : this._newNonterminalRule(options)
// Alert if this `NSymbol` already produces a rule with identical `rhs`.
if (this.rules.some(existingRule => util.arraysEqual(existingRule.rhs, newRule.rhs))) {
util.logErrorAndPath('Duplicate rule:', grammarUtil.stringifyRule(this.name, newRule))
throw new Error('Duplicate rule')
}
// Save rule definition line for inclusion in error console messages.
// Excluded from output grammar.
newRule.line = util.getModuleCallerLocation()
this.rules.push(newRule)
return this
}
/**
* The nonterminal RHS `NSymbol` wrapper parameterization, for use in
* `nonterminalRuleSchema.rhs` with `NSymbol.prototype.newNonterminalRule()`,
* to specify properties specific to the wrapped RHS `symbol`.
*
* `acceptedTense` is for use when tense is semantically meaningless. For
* example, consider the following semantically identical queries:
* past: "repos I liked"
* present: "repos I like"
* Both forms are accepted when input, but an insertion for the verb "like"
* inserts the verb in present tense.
*
* The following `grammaticalForm` values are supported:
* • nom - The nominative case form, which conjugates pronouns used as the
* subject of a verb, created by `g.newPronoun()`. For example:
* "repos `[1-sg]` created" -> "repos I created"
* • obj - The objective case form, which conjugates pronouns used as the
* object of a verb, created by `g.newPronoun()`. For example:
* "repos created by `[1-sg]`" -> "repos created by me"
* • past - The simple past tense form, which conjugates verbs created by
* `g.newVerb()` and `g.newTenseVerb()`. For example:
* "repos `[verb-like]` by me" -> "repos liked by me"
* • infinitive - The bare infinitive form, which uses the present plural form
* that `personNumber` of 'pl' uses, which conjugates verbs created by
* `g.newVerb()`. For example:
* "people who `[have]` been ..." -> "people who have been ..."
* "people who `[verb-like]` ..." -> "people who like ..."
* "repos I `[verb-do]` not ..." -> "repos I do not ..."
* • participle - The participle form, which conjugates verbs created in
* `beVerb`. For example, used in compound verb forms:
* "issues I have `[be-past]` ..." -> "issues I have been ..."
*
* `pfsearch` uses `acceptedTense` and `grammaticalForm` to conjugate `text`
* that `symbol` immediately produces (i.e., `symbol` is a term sequence), but
* does not conjugate any subsequent rules.
* • This limitation of only conjugating immediate child nodes and no further
* enforces a grammar design that conjugates as many insertion rules as
* possible during grammar generation, as opposed to leaving conjugation to
* parse-time by positioning the property higher in the parse tree.
*
* If both `acceptedTense` and `grammaticalForm` as defined, `acceptedTense`
* has priority when conjugating and falls back to `grammaticalForm` if input
* `tense` does not match.
*
* `noText` instructs `flattenTermSequence` to discard the display text
* `symbol` and its descendants produce. Requires `symbol` be a term sequence.
* • Directs term sequence `symbol` to behave as a stop sequence when matched
* in input by effectively removing the matched terminal symbols from input.
* • Requires `noInsert` also be `true` because an insertion of `symbol` would
* yield no text, and because it is a term sequence, no semantics. Hence, it
* would be identical to the rule `isOptional` creates, but with the
* insertion cost. If so desired, use `isOptional`.
*
* `isOptional` instructs `NSymbol.prototype._newNonterminalRule()` to create
* a new unary rule with the other RHS symbol with which this
* `RHSSymbolWrapper` is paired and all of the rule's properties not specific
* to this symbol (i.e., the properties in this `RHSSymbolWrapper` instance).
* • This is a shortcut for creating a rule with the same properties as the
* rule that owns the `RHSSymbolWrapper` instance with `isOptional`, but
* needs only match the other RHS symbol.
* • Apt for stop sequences, to recognize the rule whether or not the stop
* sequence RHS symbol is matched.
*
* @typedef {Object} RHSSymbolWrapper
* @property {NSymbol|(NSymbol|RHSSymbolWrapper|Array)[]} symbol The RHS
* nonterminal symbol.
* @property {string} [acceptedTense] The grammatical tense form for which the
* verb `symbol`, created with `g.newVerb()` or `g.newTenseVerb()`, is accepted
* when input in that `tense` (defined on terminal rules), but the tense verb
* form is not enforced when not input in that tense nor for insertion rules
* that use `symbol`.
* @property {string} [grammaticalForm] The grammatical form to which to
* conjugate the term sequence `symbol`.
* @property {boolean} [noInsert] Specify `createInsertionRules` can not create
* insertion rules using `symbol` and the rule in which this `RHSSymbolWrapper`
* is provided.
* @property {boolean} [noText] Specify discarding the display text `symbol`
* and its descendants produce. Requires `symbol` be a term sequence and
* `noInsert` be `true`.
* @property {boolean} [isOptional] Specify creating a version of this symbol's
* nonterminal rule without this RHS symbol. Requires this rule be binary.
*/
var rhsSymbolWrapperSchema = {
symbol: { type: [ NSymbol, Array ], required: true },
acceptedTense: { values: [ 'past' ] },
grammaticalForm: { values: [ 'nom', 'obj', 'past', 'infinitive', 'participle' ] },
noInsert: Boolean,
noText: Boolean,
isOptional: Boolean,
}
/**
* Creates a new nonterminal rule to assign to this `NSymbol`.
*
* Each item in `options.rhs` must be one of the following:
* 1. An `NSymbol` instance.
* 2. An object of the form `RHSSymbolWrapper`.
* 3. A nested ordered pair containing any combination of #1 or #2 from which to
* recursively create a new binary symbol and rule.
* • If an existing binary symbol exists that produces the provided ordered
* pair, that symbol is used instead of creating a duplicate.
* 4. A string name of an existing `NSymbol`. For use when passing an existing
* rule's `rhs` for a new rule, as in `user.js`.
*
* @private
* @memberOf NSymbol
* @param {Object} options The options object.
* @param {(NSymbol|RHSSymbolWrapper|Array|string)[]} options.rhs The RHS
* symbols this rule produces, as documented above.
* @param {boolean} [options.noInsert] Specify `createInsertionRules` can not
* create insertion rules that insert `options.rhs` entirely, but can create
* rules that insert one of two symbols in `options.rhs`.
* @param {number} [options.transpositionCost] Specify `createEditRules` can
* create a transposition rule with this cost penalty that recognizes the
* reverse order of the ordered pair `options.rhs` in input, and corrects the
* order of the display text the two RHS symbols produce when parsing.
* @param {Object[]} [options.semantic] The semantic from which a semantic tree
* is constructed in association with parse trees constructed with this rule.
* @param {string} [options.personNumber] The grammatical person-number for
* which to conjugate verbs that either `options.rhs` produces or follow this
* rule within the same parse subtree.
* @param {string} [options.anaphoraPersonNumber] The grammatical person-number
* for anaphoric rules with which to match and copy an antecedent semantic of
* the same person-number. E.g., "his|her" refers to semantics of
* third-person-singular representations.
* @param {Object|string|(Object|string)[]} [options.text] The substitution
* display text that is used in place of any text `options.rhs` and its
* ancestors generate. For use when every symbol in `options.rhs` is a term
* sequence.
* @param {number} [options.costPenalty] The rule's cost penalty.
* @returns {Object} Returns the new nonterminal rule.
*/
var nonterminalRuleSchema = {
rhs: { type: Array, arrayType: [ NSymbol, Object, Array, String ], required: true },
noInsert: Boolean,
transpositionCost: Number,
semantic: { type: Array, arrayType: Object },
personNumber: { values: [ 'oneSg', 'threeSg', 'pl' ] },
anaphoraPersonNumber: { values: [ 'threeSg', 'threePl' ] },
text: [ String, Object, Array ],
costPenalty: Number,
}
NSymbol.prototype._newNonterminalRule = function (options) {
if (util.illFormedOpts(nonterminalRuleSchema, options) || isIllFormedNonterminalRuleOptions(options)) {
throw new Error('Ill-formed nonterminal rule')
}
// Recursively converts instances of nested ordered pairs in `options.rhs`
// to binary symbols.
flattenNonterminalRHS(options.rhs)
/**
* Iterate through binary `options.rhs`, and for `RHSSymbolWrapper`
* instances where `RHSSymbolWrapper.isOptional` is `true`, add a new unary
* nonterminal rule to this `NSymbol` with only the other RHS symbol and the
* `options` properties not specific to the optional symbol.
*
* Skip if this method's current invocation is a dry-run via
* `getExistingBinaryNSymbol()` (to create a temporary nonterminal rule for
* comparison to existing rules), which bound the method to
* `NSymbol.prototype` instead of an existing `NSymbol`.
*/
if (this !== NSymbol.prototype) {
this._addOptionalRHSSymbolRules(options)
}
var newRule = {
// Map `options.rhs` to the nonterminal symbol names for writing grammar
// to output.
rhs: options.rhs.map(nonterminalRHSSymbolToName),
// The `rhs` indexes for which to prevent `createInsertionRules` from
// creating insertion rules.
noInsertionIndexes: getRHSNoInsertionIndexes(options.rhs),
// The `rhs` indexes for which to instruct `flattenTermSequence` to
// discard `text` the term sequence at each `rhs` index produces when
// creating a new `ruleProps` for `newRule`.
rhsNoTextIndexes: getRHSNoTextIndexes(options.rhs),
/**
* The grammatical properties `pfsearch` uses to conjugate the `text`
* objects the term sequences in `rhs` produce; i.e., `ruleProps.text`
* of immediate child nodes (post flattening via `flattenTermSequence`.
*
* Structured as a map of RHS index to the grammatical properties with
* which to conjugate the term sequence at that RHS index.
*
* Stores multiple conjugation properties in a single object for each
* `rhs` index. Allows `pfsearch` to atomically check if a `ruleProps`
* instance contains any grammatical conjugation properties at all,
* instead of separate checks for each possible conjugation property,
* before attempting to conjugate the child node's display text.
*
* If `gramProps` lacks any properties, `removeTempRulesAndProps`
* removes the empty object at the end of grammar generation.
*
* Note: It is much better to have a single terminal rule set with
* dynamic grammatical conjugation than to define separate rule sets for
* each grammatical case (depending on the rule) with the same
* substitution/synonym terminal symbols. The overhead `Parser` endures
* for the larger state table (because of the additional rules) is far
* greater than the overhead for additional `pfsearch` conjugation.
*/
gramProps: getGramProps(options.rhs),
/**
* Enable `createEditRules` to create a transposition rule with this
* cost penalty that recognizes the reverse order of the ordered pair
* `options.rhs` in input and corrects their order when parsing (i.e.,
* swaps the order of the display text the two RHS symbols produce).
*/
transpositionCost: options.transpositionCost,
// Specify preventing this rule's entire RHS from being inserted, while
// permitting insertion rules created using one of its RHS symbols.
noInsert: options.noInsert,
personNumber: options.personNumber,
anaphoraPersonNumber: options.anaphoraPersonNumber,
}
/**
* Assigns the following term sequence properties to `newRule`:
* • {boolean} [newRule.isTermSequence] - Defines `true` if the
* `newRule.rhs` produces only term sequences, which instructs
* `flattenTermSequence` to merge the `text` values produced by
* `newRule.rhs`, assign the merged `text` to this rule's parse node, and
* prevent `pfsearch` from traversing past this rule's node.
* • {Object|string|(Object|string)[]} [newRule.text] - If
* `newRule.isTermSequence` is `true`, assigns `options.text` to
* `newRule.text` for nonterminal substitutions, which instructs
* `flattenTermSequence` to use this rule's display text instead of text
* `newRule.rhs` produces for the terminal node it creates for this rule.
* • {number[]} newRule.rhsTermSequenceIndexes - If
* `newRule.isTermSequence` is not truthy because not every RHS symbol is
* a term sequence, `newRule.rhsTermSequenceIndexes` specifies the
* `newRule.rhs` indexes of which RHS symbol is a term sequence, if
* either. Only possible if `newRule.rhs` is binary. For use by
* `flattenTermSequence` to flatten a binary nonterminal node with one
* term sequence into an insertion node with the term sequence's text as
* the insertion text.
*/
assignTermSequence(newRule, options)
/**
* Assigns the following semantic and cost properties to `newRule`:
* • {Object} [newRule.semantic] - The semantic used in semantic trees
* that correspond to parse trees that contain this rule.
* • {boolean} [newRule.semanticIsReduced] - Specify if `newRule.semantic`
* is reduced, else semantic is to accept other semantics as arguments.
* • {number} newRule.cost - The rule cost, which includes the cost of
* `newRule.semantic` and `options.costPenalty`, if any.
*
* Invoke after `assignTermSequence()` because this function checks for
* `newRule` properties which `assignTermSequence()` assigns.
*/
assignSemanticAndCost(newRule, options)
return newRule
}
/**
* Checks if `ruleOptions`, which was passed to
* `NSymbol.prototype._newNonterminalRule()` or `NSymbol.newBinaryRule()`,
* is ill-formed. If so, prints an error.
*
* @private
* @static
* @param {Object} ruleOptions The nonterminal rule ruleOptions object to
* inspect.
* @returns {boolean} Returns `true` if `ruleOptions` is ill-formed, else
* `false`.
*/
function isIllFormedNonterminalRuleOptions(ruleOptions) {
// Check for ill-formed instances of `RHSSymbolWrapper`.
var rhs = ruleOptions.rhs
var rhsLen = rhs.length
for (var r = 0; r < rhsLen; ++r) {
var rhsSym = rhs[r]
if (rhsSym.constructor === Object && util.illFormedOpts(rhsSymbolWrapperSchema, rhsSym)) {
return true
}
}
if (rhsLen > 2) {
util.logErrorAndPath('Nonterminal rule has > 2 `rhs` symbols:', ruleOptions)
return true
}
if (ruleOptions.transpositionCost !== undefined && rhsLen !== 2) {
util.logErrorAndPath('Nonterminal rule with `transpositionCost` does not have two `rhs` symbols:', ruleOptions)
return true
}
return false
}
/**
* Recursively converts instances of nested ordered pairs in `rhs` to binary
* symbols.
*
* Note: This function mutates `rhs`.
*
* @private
* @static
* @param {(NSymbol|RHSSymbolWrapper|Array|string)[]} rhs The nonterminal
* RHS to flatten.
* @returns {rhs} Returns `rhs` flattened.
*/
function flattenNonterminalRHS(rhs) {
for (var rhsIndex = 0, rhsLen = rhs.length; rhsIndex < rhsLen; ++rhsIndex) {
var rhsSym = rhs[rhsIndex]
/**
* If `rhsSym` is an array (of symbols), recursively invokes the array
* on `NSymbol.newBinaryRule()` to create a new binary `NSymbol`.
*
* Suppress `NSymbol.newBinaryRule()` warning for duplicate binary rule
* definition, which uses an existing, identical binary rule if any,
* because the nested-pair-shorthand is limited to only RHS symbols,
* suggesting any duplicate definition is not an accident and simply an
* extension of the convenience the shorthand offers.
*/
if (rhsSym.constructor === Array) {
rhs[rhsIndex] = NSymbol.newBinaryRule({ rhs: rhsSym }, true)
} else if (rhsSym.constructor === Object && rhsSym.symbol.constructor === Array) {
rhsSym.symbol = NSymbol.newBinaryRule({ rhs: rhsSym.symbol }, true)
}
}
return rhs
}
/**
* Iterates through binary `options.rhs`, and for `RHSSymbolWrapper`
* instances where `RHSSymbolWrapper.isOptional` is `true`, adds a new unary
* nonterminal rule to this `NSymbol` with only the other RHS symbol and the
* `options` properties not specific to the optional symbol.
*
* Adds the new unary rule(s) before
* `NSymbol.prototype._newNonterminalRule(options)`, which invokes this
* method, adds the rule it will create from `options`. This order is
* immaterial.
*
* Invoke this method from `NSymbol.prototype._newNonterminalRule()` after
* flattening nested ordered pairs in RHS with `flattenNonterminalRHS()`.
*
* @memberOf NSymbol
* @param {Object} options The `NSymbol.prototype._newNonterminalRule()`
* options object.
* @returns {Object} Returns this `NSymbol` instance.
*/
NSymbol.prototype._addOptionalRHSSymbolRules = function (options) {
var rhs = options.rhs
for (var rhsIndex = 0, rhsLen = rhs.length; rhsIndex < rhsLen; ++rhsIndex) {
var rhsSym = rhs[rhsIndex]
if (rhsSym.constructor === Object && rhsSym.isOptional) {
// Ensure `RHSSymbolWrapper.isOptional` is only used with binary rules.
if (rhsLen !== 2) {
util.logErrorAndPath('RHS symbol with `isOptional` used with non-binary rule:', rhsSym, options)
throw new Error('Ill-formed nonterminal rule')
}
// Prevent creating unary, recursive rules.
var otherRHSSym = rhs[Number(!rhsIndex)]
if (this === otherRHSSym) {
util.logErrorAndPath('RHS symbol with `isOptional` creates a unary recursive rule, where the LHS and RHS are the same symbol:\n', util.stylize(this.name), '->', options)
throw new Error('Ill-formed nonterminal rule')
}
// Create shallow clone.
var optionsClone = util.clone(options)
// Create new unary `rhs` with only the other RHS symbol. Includes
// grammatical properties, insertion restrictions, et cetera, specific
// to that RHS symbol.
optionsClone.rhs = [ otherRHSSym ]
// Delete `transpositionCost`, which is specific to binary rules.
delete optionsClone.transpositionCost
// Add new unary nonterminal rule.
this.addRule(optionsClone)
}
}
return this
}
/**
* Maps `rhsSym`, which was passed to
* `NSymbol.prototype.addNonterminalRule(options)` in `options.rhs`, to its
* nonterminal symbol name.
*
* For use by `NSymbol.prototype._newNonterminalRule()` to map the provided
* RHS nonterminal symbols to their string names for writing grammar to
* output.
*
* If `rhsSym` is a string name of an `NSymbol`, checks if the respective
* `NSymbol` exists and throws an exception if not. Accepts the string
* `NSymbol` names for use in `splitRegexTerminalSymbols`.
*
* @private
* @static
* @param {NSymbol|RHSSymbolWrapper|string} rhsSym The RHS nonterminal
* symbol to map.
* @returns {string} Returns the name of `rhsSym`.
*/
function nonterminalRHSSymbolToName(rhsSym) {
// If `rhsSym` is a string name of an `NSymbol`, checks if the respective
// `NSymbol` exists, else throws an exception. Only for use by
// `splitRegexTerminalSymbols`.
if (rhsSym.constructor === String) {
if (!NSymbol._ruleSets.hasOwnProperty(rhsSym)) {
util.logErrorAndPath('RHS nonterminal symbol does not exist:', util.stylize(rhsSym))
throw new Error('Unrecognized nonterminal symbol name')
}
return rhsSym
}
// `NSymbol`
if (rhsSym.constructor === NSymbol) {
return rhsSym.name
}
// `RHSSymbolWrapper`
if (rhsSym.constructor === Object) {
return rhsSym.symbol.name
}
util.logErrorAndPath('Unrecognized RHS symbol:', rhsSym)
throw new Error('Unrecognized RHS symbol')
}
/**
* Maps `rhsSym`, which was passed to `NSymbol.newBinaryRule(options)` in
* `options.rhs`, to a nonterminal symbol name with a distinguishing suffix
* if applicable, for use in the new binary symbol's name.
*
* For use by `NSymbol.newBinaryRule()` in the `name` of the new `NSymbol`
* it creates, for which it concatenates the mapped values this function
* returns for each of the new binary rule's RHS symbols.
*
* To distinguish binary symbols with identical RHS but differing
* properties, includes suffixes for the RHS symbol names using instances of
* the properties `RHSSymbolWrapper.noInsert` and
* `RHSSymbolWrapper.grammaticalForm`, if defined.
*
* @private
* @static
* @param {NSymbol|RHSSymbolWrapper|string} rhsSym The RHS nonterminal
* symbol to map.
* @returns {string} Returns the name of `rhsSym`, distinguishing suffix if
* applicable.
*/
function binaryRHSSymbolToName(rhsSym) {
if (rhsSym.constructor === Object) {
var name = rhsSym.symbol.name
// Specify in name if insertion rules are forbidden for `rhsSym`.
if (rhsSym.noInsert) {
name = grammarUtil.hyphenate(name, 'no', 'insert')
}
// Specify in name if `rhsSym` has an associated `grammaticalForm`
// conjugative value.
if (rhsSym.grammaticalForm) {
name = grammarUtil.hyphenate(name, rhsSym.grammaticalForm)
}
return name
}
/**
* Gets the nonterminal symbol name for all RHS items not instances of
* `RHSSymbolWrapper`: `NSymbol` instances, string names of `NSymbol`
* instances (for use by `splitRegexTerminalSymbols`), else throws an
* exception for unrecognized type.
*/
return nonterminalRHSSymbolToName(rhsSym)
}
/**
* Creates the `gramProps` object from `RHSSymbolWrapper` instances in `rhs`
* for a new nonterminal rule, formatted for use by `pfsearch` and
* `conjugateText`.
*
* Structures the returned `gramProps` object as a map of RHS index to the
* grammatical properties with which to conjugate the term sequence at that
* RHS index.
*
* Consider the following example, which conjugates each term sequence in the
* rule's binary RHS as specified.
* gramProps: {
* 0: {
* // Inflects the display text object produced by `rhs[0]` to its
* // infinitive form.
* grammaticalForm: 'infinitive',
* },
* 1: {
* // Accept the past tense form of `rhs[1]` if input is past tense,
* // while defaulting to `grammaticalForm` infinitive form for
* // insertions created from this rule.
* acceptedTense: 'past',
* // If `rhs[1]` is not input in past tense (as `acceptedTense`
* // specifies), `grammaticalForm` inflects the display text object
* // produced by `rhs[1]` to its infinitive form.
* grammaticalForm: 'infinitive',
* },
* }
*
* Defines conjugation properties for each RHS symbol separately, instead of
* using the same properties to conjugate text objects produced by either RHS
* symbol (as previously implemented), to enable different conjugation for two
* separate verbs in a binary RHS.
* • For example, in the following rule, `pfsearch` would conjugate `[have]`
* to its infinitive form and `[verb-created]` to its past tense form:
* "(people who) `[have]` [verb-created]` (...)"
*
* Stores multiple conjugation properties in a single object for each `rhs`
* index. Allows `pfsearch` to atomically check if a `ruleProps` instance
* contains any grammatical conjugation properties at all, instead of separate
* checks for each possible conjugation property, before attempting to
* conjugate the child node's display text.
*
* Checks each `RHSSymbolWrapper` if the `symbol` produces conjugative `text`
* for each associated grammatical property. If not, throws an exception.
* • The `RHSSymbolWrapper` grammatical properties can only conjugate `text`
* values `symbol` immediately produces (after term sequence flattening),
* and not further.
*
* At the end of grammar generation, `removeTempRulesAndProps` removes
* pairings within `gramProps` instances that map a RHS index to an empty
* object, and removes entire `gramProps` instances from rules if each RHS
* index maps to an empty object.
*
* Invoke this function after invoking `flattenNonterminalRHS(rhs)`.
*
* @private
* @static
* @param {(NSymbol|RHSSymbolWrapper|string)[]} rhs The nonterminal RHS array
* to iterate over.
* @returns {Object} Returns the new `gramProps` object for assignment to a
* new nonterminal rule.
*/
function getGramProps(rhs) {
var gramProps = {}
for (var rhsIndex = 0, rhsLen = rhs.length; rhsIndex < rhsLen; ++rhsIndex) {
// The grammatical properties with which to conjugate the term sequence at
// `rhs[rhsIndex]`.
var symGramProps = gramProps[rhsIndex] = {}
var rhsSym = rhs[rhsIndex]
if (rhsSym.constructor === Object) {
if (rhsSym.acceptedTense) {
symGramProps.acceptedTense = rhsSym.acceptedTense
// Check `rhsSym.symbol` produces `text` that
// `symGramProps.acceptedTense` can conjugate.
if (isFutileGramProp(rhsSym.symbol.name, symGramProps.acceptedTense)) {
printFutileGramPropError(rhsSym, 'acceptedTense')
throw new Error('Ill-formed nonterminal grammatical property')
}
}
if (rhsSym.grammaticalForm) {
// Map 'infinitive' -> 'pl' to use the `pl` property on text objects
// created by `g.newVerb()` or `g.newTenseVerb()`.
symGramProps.form = rhsSym.grammaticalForm === 'infinitive' ? 'pl' : rhsSym.grammaticalForm
// Check `rhsSym.symbol` produces `text` that `symGramProps.form` can
// conjugate. Check after mapping 'infinitive' -> 'pl'.
if (isFutileGramProp(rhsSym.symbol.name, symGramProps.form)) {
printFutileGramPropError(rhsSym, 'grammaticalForm')
throw new Error('Ill-formed nonterminal grammatical property')
}
}
}
}
// If `gramProps` lacks any properties, `removeTempRulesAndProps` removes
// the empty object at the conclusion of grammar generation.
return gramProps
}
/**
* Prints an error message for when `rhsSymbolWrapper.symbol` produces no
* rules with a conjugative `text` object with a value for the grammatical
* property, `gramProp`.
*
* For use by `getGramProps()`.
*
* @private
* @static
* @param {RHSSymbolWrapper} rhsSymbolWrapper The RHS symbol wrapper with
* the futile `symbol`.
* @param {string} gramProp The grammatical property name, defined on
* `RHSSymbolWrapper`.
*/
function printFutileGramPropError(rhsSymbolWrapper, gramProp) {
util.logErrorAndPath('The RHS symbol,', util.stylize(rhsSymbolWrapper.symbol.name) + ', produces no rules with conjugative `text` applicable to the grammatical property', util.stylize(gramProp), '->', util.stylize(rhsSymbolWrapper[gramProp]) + ':', rhsSymbolWrapper)
}
/**
* Checks if `rhsSym` produces no rules with a conjugative `text` object
* with a value for the grammatical form property, `gramProp`.
*
* Grammatical form properties, defined on the nonterminal rule `gramProps`
* object for a specific RHS symbol, can only conjugate `text` objects on
* term sequences that the RHS symbol (at the property's associated index)
* immediately produces after term sequence flattening (i.e., its child
* nodes).
* • The `grammaticalForm` value 'infinitive' is mapped to 'pl' when
* creating the `gramProps` object, before invoking this function, because
* it uses the `pl` property on text objects that `g.newVerb()` and
* `g.newTenseVerb()` creates.
*
* @private
* @static
* @param {string} rhsSym The RHS nonterminal symbol to check if produces
* conjugative `text` objects with a value for `gramProp`.
* @param {string} gramProp The `gramProps` value for which to check if can
* conjugate `rhsSym`; e.g., 'past', 'obj'.
* @returns {boolean} Returns `true` if `rhsSym` produces no conjugative
* `text` applicable to `gramProp`, else `false`.
*/
function isFutileGramProp(rhsSym, gramProp) {
if (isUnrecognizedGramProp(gramProp)) {
throw new Error('Unrecognized `gramProps` property')
}
var rhsRules = NSymbol._ruleSets[rhsSym]
for (var r = 0, rulesLen = rhsRules.length; r < rulesLen; ++r) {
var rhsRule = rhsRules[r]
var ruleText = rhsRule.text
if (ruleText) {
if (ruleText.constructor === Array) {
// `rhsRule` is a nonterminal multi-token substitution.
if (ruleText.some(textItem => textItem[gramProp])) {
// `gramProp` can conjugate a text object object in this text array.
return false
}
} else if (ruleText[gramProp]) {
// `gramProp` can conjugate this text object.
return false
}
} else if (rhsRule.isTermSequence) {
/**
* `rhsRule` is a non-substitution and non-edit term sequence, for which
* `flattenTermSequence` will merge the `text` values `rhsRule.rhs`
* produces and assign it to `rhsRule.text` (within reach of `gramProp`)
* for `pfsearch` to conjugate. For example:
* `[github-create]` -> `[create]` -> "create", text: `{create-text-forms}`
*/
var subRHS = rhsRule.rhs
for (var s = 0, subRHSLen = subRHS.length; s < subRHSLen; ++s) {
if (!isFutileGramProp(subRHS[s], gramProp)) {
// `gramProp` can conjugate a text object produced by the term
// `sequence, `rhsRule`.
return false
}
}
}
}
return true
}
/**
* Checks if `gramProp` is an unrecognized grammatical form property. If so,
* prints an error.
*
* For use by `isFutileGramProp()` to check its provided value.
*
* @private
* @static
* @param {string} gramProp The `gramProps` value from a new nonterminal
* rule.
* @returns {boolean} Returns `true` if `gramProp` is ill-formed, else
* `false`.
*/
function isUnrecognizedGramProp(gramProp) {
// The value 'infinitive' is used when defining nonterminal rules, but
// mapped to 'pl' for the output grammar because it uses the `pl` value on
// `text` objects.
if (gramProp === 'infinitive') {
util.logErrorAndPath('Grammatical form property', util.stylize('infinitive'), 'should have been mapped to', util.stylize('pl') + ':', util.stylize(gramProp))
return true
}
if (gramProp !== 'pl' && rhsSymbolWrapperSchema.grammaticalForm.values.indexOf(gramProp) === -1) {
util.logErrorAndPath('Unrecognized `gramProps` property:', util.stylize(gramProp), '\n Acceptable `gramProps` properties:', util.without(rhsSymbolWrapperSchema.grammaticalForm.values, 'infinitive').map(util.unary(util.stylize)).join(', '))
return true
}
return false
}
/**
* Generates an array of `rhs` indexes, from `RHSSymbolWrapper` instances in
* `rhs` with `RHSSymbolWrapper.noInsert`, for which to instruct
* `createInsertionRules` to prevent the creation of insertion rules at that
* index.
*
* Invoke this function after invoking `flattenNonterminalRHS(rhs)`.
*
* @private
* @static
* @param {(NSymbol|RHSSymbolWrapper|string)[]} rhs The nonterminal RHS
* array to iterate over.
* @returns {number[]|undefined} Returns the array of RHS indexes for which
* to forbid insertions, if any, else `undefined`.
*/
function getRHSNoInsertionIndexes(rhs) {
var noInsertionIndexes = []
for (var rhsIndex = 0, rhsLen = rhs.length; rhsIndex < rhsLen; ++rhsIndex) {
var rhsSym = rhs[rhsIndex]
if (rhsSym.constructor === Object && rhsSym.noInsert) {
noInsertionIndexes.push(rhsIndex)
}
}
if (noInsertionIndexes.length > 0) {
return noInsertionIndexes
}
}
/**
* Generates a map of `rhs` indexes, from `RHSSymbolWrapper` instances in
* `rhs` with `RHSSymbolWrapper.noText`, for which to instruct
* `flattenTermSequence` to discard `text` the term sequence at that `rhs`
* index produces when creating a new `ruleProps` for the associated
* nonterminal rule.
*
* Invoke this function after invoking `flattenNonterminalRHS(rhs)`.
*
* @private
* @static
* @param {(NSymbol|RHSSymbolWrapper|string)[]} rhs The nonterminal RHS
* array to iterate over.
* @returns {Object.<number, boolean>|undefined} Returns the map of RHS
* indexes for which to prevent display text, if any, else `undefined`.
*/
function getRHSNoTextIndexes(rhs) {
var rhsNoTextIndexes = {}
for (var rhsIndex = 0, rhsLen = rhs.length; rhsIndex < rhsLen; ++rhsIndex) {
var rhsSym = rhs[rhsIndex]
if (rhsSym.constructor === Object && rhsSym.noText) {
if (!rhsSym.symbol.isTermSequence) {
util.logErrorAndPath('`RHSSymbolWrapper.noText` is `true` when `RHSSymbolWrapper.symbol` is not a term sequence:', rhsSym)
throw new Error('Ill-formed RHSSymbolWrapper')
}
if (!rhsSym.noInsert) {
util.logErrorAndPath('`RHSSymbolWrapper.noText` is `true` without `RHSSymbolWrapper.noInsert` also `true`:', rhsSym)
throw new Error('Ill-formed RHSSymbolWrapper')
}
// Only map the RHS index if `true`, skip if `false`.
rhsNoTextIndexes[rhsIndex] = true
}
}
if (Object.keys(rhsNoTextIndexes).length > 0) {
return rhsNoTextIndexes
}
}
/**
* Assigns the semantic and cost properties to `newRule` using
* `ruleOptions`. For use by `NSymbol.prototype._newNonterminalRule()` and
* `NSymbol.prototype._newTerminalRule()`.
*
* Assigns the following semantic and cost properties to `newRule`:
* • {Object} [newRule.semantic] - The semantic used in semantic trees that
* correspond to parse trees that contain this rule.
* • {boolean} [newRule.semanticIsReduced] - Specify if `newRule.semantic`
* is reduced, else semantic is to accept other semantics as arguments.
* • {number} newRule.cost - The rule cost, which includes the cost of
* `newRule.semantic` and `ruleOptions.costPenalty`, if any.
*
* In `NSymbol.prototype._newNonterminalRule()`, invoke this function after
* invoking `assignTermSequence()`, which assigns term sequence properties
* to `newRule` which this function checks.
*
* Note: This function mutates `newRule`.
*
* @private
* @static
* @param {Object} newRule The new rule, terminal or nonterminal, which to
* assign the semantic and cost properties.
* @param {Object} ruleOptions The rule options object passed to
* `NSymbol.prototype._newNonterminalRule()` or
* `NSymbol.prototype._newTerminalRule()`.
* @returns {Object} Returns `newRule`.
*/
function assignSemanticAndCost(newRule, ruleOptions) {
// The cost penalty added to this rule's cost.
var costPenalty = ruleOptions.costPenalty || 0
if (ruleOptions.semantic) {
if (!ruleOptions.isTerminal && ruleOptions.anaphoraPersonNumber) {
util.logErrorAndPath('Anaphoric rule has semantic:', ruleOptions)
throw new Error('Ill-formed nonterminal rule')
}
// Assign the semantic used in semantic trees that correspond to parse
// trees that contain this rule.
newRule.semantic = ruleOptions.semantic.sort(semantic.compare)
/**
* Specify if `newRule.semantic` is reduced, else semantic is to accept
* other semantics as arguments.
*
* For nonterminal rules, the rule's semantic may be reduced and have
* `newRule.rhs` produce additional semantics, reduced or not. If so,
* `pfsearch` will merge this rule's reduced semantic
* (`newRule.semantic`) with the semantic(s) `newRule.rhs` produces,
* forming a semantic array of RHS semantic arguments.
*/
newRule.semanticIsReduced = semantic.isReduced(ruleOptions.semantic)
/**
* Use `costPenalty` and the rule's semantic cost as the rule's cost,
* which `NSymbol.diversifyRuleCosts()` later tweaks to ensure cost
* variation. `pfsearch` uses the cumulative cost of parse trees to rank
* the k-best trees.
*/
newRule.cost = semantic.sumCosts(ruleOptions.semantic) + costPenalty
} else {
/**
* Use `costPenalty`, if any (else, cost of 0), as rule cost, which
* `NSymbol.diversifyRuleCosts()` later tweaks to ensure cost variation.
*
* `NSymbol.diversifyRuleCosts()` occurs after grammar generation to
* ignore removed, unused rules when assigning the rule epsilon cost
* value.
*/
newRule.cost = costPenalty
}
}
/**
* Assigns term sequence properties to `newNonterminalRule` using
* `ruleOptions`. For use by `NSymbol.prototype._newNonterminalRule()`.
*
* Assigns the following term sequence properties to `newNonterminalRule`:
* • {boolean} [newNonterminalRule.isTermSequence] - Defines `true` if the
* `newNonterminalRule.rhs` produces only term sequences, which instructs
* `flattenTermSequence` to merge the `text` values produced by
* `newNonterminalRule.rhs`, assign the merged `text` to this rule's parse
* node, and prevent `pfsearch` from traversing past this rule's node.
* • {Object|string|(Object|string)[]} [newNonterminalRule.text] - If
* `newNonterminalRule.isTermSequence` is `true`, assigns `options.text` to
* `newNonterminalRule.text` for nonterminal substitutions, which instructs
* `flattenTermSequence` to use this rule's display text instead of text
* `newNonterminalRule.rhs` produces for the terminal node it creates for
* this rule.
* • {number[]} newNonterminalRule.rhsTermSequenceIndexes - If
* `newNonterminalRule.isTermSequence` is not truthy because not every RHS
* symbol is a term sequence, `newNonterminalRule.rhsTermSequenceIndexes`
* specifies the `newNonterminalRule.rhs` indexes of which RHS symbol is a
* term sequence, if either. Only possible if `newNonterminalRule.rhs` is
* binary. For use by `flattenTermSequence` to flatten a binary nonterminal
* node with one term sequence into an insertion node with the term
* sequence's text as the insertion text.
*
* Note: This function mutates `newNonterminalRule`.
*
* @private
* @static
* @param {Object} newNonterminalRule The new nonterminal rule which to
* assign term sequence properties.
* @param {Object} ruleOptions The rule options object passed to
* `NSymbol.prototype._newNonterminalRule()`.
* @returns {Object} Returns `newNonterminalRule`.
*/
function assignTermSequence(newNonterminalRule, ruleOptions) {
// Check if `ruleOptions.rhs` contains only terminal rule sets (e.g.,
// `g.newVerb()`, `g.newPronoun()`) or terminal rule sequences (e.g.,
// `g.newTermSequence()`).
var rhs = ruleOptions.rhs
if (rhs.every(rhsSym => (rhsSym.constructor === Object ? rhsSym.symbol : rhsSym).isTermSequence)) {
/**
* Forbid a rule's RHS from containing multiple verbs, each of which
* produces a conjugative `text` object that the rule can not conjugate.
* • If a RHS symbol is a conjugative verb, checks if that symbol is an
* instance of `RHSSymbolWrapper` with a `grammaticalForm` value that
* can conjugate the `text` object the verb produces.
*
* Prevents `flattenTermSequence` from merging two conjugative verb
* `text` objects, each of which can have an input `tense`, and passing
* the merged text array up to a parent node without knowing to which
* text object each input `tense` applies.
*
* It is okay if `newNonterminalRule` can only conjugate one verb and
* not the other, because it still prevents two unused instances of
* `tense` with two verb conjugative `text` objects.