forked from marko-js/htmljs-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBaseParser.js
193 lines (154 loc) · 5.03 KB
/
BaseParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
'use strict';
var CODE_NEWLINE = 10;
var CODE_CARRIAGE_RETURN = 13;
class Parser {
static createState(mixins) {
return mixins;
}
constructor(options) {
this.reset();
}
reset() {
// current absolute character position
this.pos = -1;
// The maxPos property is the last absolute character position that is
// readable based on the currently received chunks
this.maxPos = -1;
// the current parser state
this.state = null;
// The raw string that we are parsing
this.data = this.src = null;
this.filename = null;
}
setInitialState(initialState) {
this.initialState = initialState;
}
enterState(state) {
if (this.state === state) {
// Re-entering the same state can lead to unexpected behavior
// so we should throw error to catch these types of mistakes
throw new Error('Re-entering the current state is illegal - ' + state.name);
}
var oldState;
if ((oldState = this.state) && oldState.leave) {
// console.log('Leaving state ' + oldState.name);
oldState.leave.call(this, state);
}
// console.log('Entering state ' + state.name);
this.state = state;
if (state.enter) {
state.enter.call(this, oldState);
}
}
/**
* Look ahead to see if the given str matches the substring sequence
* beyond
*/
lookAheadFor(str, startPos) {
// Have we read enough chunks to read the string that we need?
if (startPos == null) {
startPos = this.pos + 1;
}
var len = str.length;
var endPos = startPos + len;
if (endPos > this.maxPos + 1) {
return undefined;
}
var found = this.data.substring(startPos, endPos);
return (found === str) ? str : undefined;
}
/**
* Look ahead to a character at a specific offset.
* The callback will be invoked with the character
* at the given position.
*/
lookAtCharAhead(offset, startPos) {
if (startPos == null) {
startPos = this.pos;
}
return this.data.charAt(startPos + offset);
}
lookAtCharCodeAhead(offset, startPos) {
if (startPos == null) {
startPos = this.pos;
}
return this.data.charCodeAt(startPos + offset);
}
rewind(offset) {
this.pos -= offset;
}
skip(offset) {
// console.log('-- ' + JSON.stringify(this.data.substring(this.pos, this.pos + offset)) + ' -- ' + 'SKIPPED'.gray);
this.pos += offset;
}
end() {
this.pos = this.maxPos + 1;
}
substring(pos, endPos) {
return this.data.substring(pos, endPos);
}
parse(data, filename) {
if (data == null) {
return;
}
// call the constructor function again because we have a contract that
// it will fully reset the parser
this.reset();
if (Array.isArray(data)) {
data = data.join('');
}
this.src = data; // This is the unmodified data used for reporting warnings
this.filename = filename;
// Strip off the byte order mark (BOM) sequence
// at the beginning of the file:
// - https://en.wikipedia.org/wiki/Byte_order_mark
// > The Unicode Standard permits the BOM in UTF-8, but does not require or recommend its use.
if (data.charCodeAt(0) === 0xFEFF) {
data = data.slice(1);
}
this.data = data;
this.maxPos = data.length - 1;
// Enter initial state
if (this.initialState) {
this.enterState(this.initialState);
}
// Move to first position
this.pos = 0;
if (!this.state) {
// Cannot resume when parser has no state
return;
}
var pos;
while ((pos = this.pos) <= this.maxPos) {
let ch = data[pos];
let code = ch.charCodeAt(0);
let state = this.state;
if (code === CODE_NEWLINE) {
if (state.eol) {
state.eol.call(this, ch);
}
this.pos++;
continue;
} else if (code === CODE_CARRIAGE_RETURN) {
let nextPos = pos + 1;
if (nextPos < data.length && data.charCodeAt(nextPos) === CODE_NEWLINE) {
if (state.eol) {
state.eol.call(this, '\r\n');
}
this.pos+=2;
continue;
}
}
// console.log('-- ' + JSON.stringify(ch) + ' -- ' + this.state.name.gray);
// We assume that every state will have "char" function
state.char.call(this, ch, code);
// move to next position
this.pos++;
}
let state = this.state;
if (state && state.eof) {
state.eof.call(this);
}
}
}
module.exports = Parser;