Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
80536 views
1
/*global Buffer*/
2
// Named constants with unique integer values
3
var C = {};
4
// Tokens
5
var LEFT_BRACE = C.LEFT_BRACE = 0x1;
6
var RIGHT_BRACE = C.RIGHT_BRACE = 0x2;
7
var LEFT_BRACKET = C.LEFT_BRACKET = 0x3;
8
var RIGHT_BRACKET = C.RIGHT_BRACKET = 0x4;
9
var COLON = C.COLON = 0x5;
10
var COMMA = C.COMMA = 0x6;
11
var TRUE = C.TRUE = 0x7;
12
var FALSE = C.FALSE = 0x8;
13
var NULL = C.NULL = 0x9;
14
var STRING = C.STRING = 0xa;
15
var NUMBER = C.NUMBER = 0xb;
16
// Tokenizer States
17
var START = C.START = 0x11;
18
var TRUE1 = C.TRUE1 = 0x21;
19
var TRUE2 = C.TRUE2 = 0x22;
20
var TRUE3 = C.TRUE3 = 0x23;
21
var FALSE1 = C.FALSE1 = 0x31;
22
var FALSE2 = C.FALSE2 = 0x32;
23
var FALSE3 = C.FALSE3 = 0x33;
24
var FALSE4 = C.FALSE4 = 0x34;
25
var NULL1 = C.NULL1 = 0x41;
26
var NULL2 = C.NULL2 = 0x42;
27
var NULL3 = C.NULL3 = 0x43;
28
var NUMBER1 = C.NUMBER1 = 0x51;
29
var NUMBER2 = C.NUMBER2 = 0x52;
30
var NUMBER3 = C.NUMBER3 = 0x53;
31
var NUMBER4 = C.NUMBER4 = 0x54;
32
var NUMBER5 = C.NUMBER5 = 0x55;
33
var NUMBER6 = C.NUMBER6 = 0x56;
34
var NUMBER7 = C.NUMBER7 = 0x57;
35
var NUMBER8 = C.NUMBER8 = 0x58;
36
var STRING1 = C.STRING1 = 0x61;
37
var STRING2 = C.STRING2 = 0x62;
38
var STRING3 = C.STRING3 = 0x63;
39
var STRING4 = C.STRING4 = 0x64;
40
var STRING5 = C.STRING5 = 0x65;
41
var STRING6 = C.STRING6 = 0x66;
42
// Parser States
43
var VALUE = C.VALUE = 0x71;
44
var KEY = C.KEY = 0x72;
45
// Parser Modes
46
var OBJECT = C.OBJECT = 0x81;
47
var ARRAY = C.ARRAY = 0x82;
48
49
// Slow code to string converter (only used when throwing syntax errors)
50
function toknam(code) {
51
var keys = Object.keys(C);
52
for (var i = 0, l = keys.length; i < l; i++) {
53
var key = keys[i];
54
if (C[key] === code) { return key; }
55
}
56
return code && ("0x" + code.toString(16));
57
}
58
59
60
function Parser() {
61
this.tState = START;
62
this.value = undefined;
63
64
this.string = undefined; // string data
65
this.unicode = undefined; // unicode escapes
66
67
// For number parsing
68
this.negative = undefined;
69
this.magnatude = undefined;
70
this.position = undefined;
71
this.exponent = undefined;
72
this.negativeExponent = undefined;
73
this.numberLength = 0;
74
75
this.key = undefined;
76
this.mode = undefined;
77
this.stack = [];
78
this.state = VALUE;
79
this.bytes_remaining = 0; // number of bytes remaining in multi byte utf8 char to read after split boundary
80
this.bytes_in_sequence = 0; // bytes in multi byte utf8 char to read
81
this.temp_buffs = { "2": new Buffer(2), "3": new Buffer(3), "4": new Buffer(4) }; // for rebuilding chars split before boundary is reached
82
83
// Stream offset
84
this.offset = -1;
85
}
86
var proto = Parser.prototype;
87
proto.charError = function (buffer, i) {
88
this.onError(new Error("Unexpected " + JSON.stringify(String.fromCharCode(buffer[i])) + " at position " + i + " in state " + toknam(this.tState)));
89
};
90
proto.onError = function (err) { throw err; };
91
proto.write = function (buffer) {
92
if (typeof buffer === "string") buffer = new Buffer(buffer);
93
//process.stdout.write("Input: ");
94
//console.dir(buffer.toString());
95
var n;
96
for (var i = 0, l = buffer.length; i < l; i++) {
97
if (this.tState === START){
98
n = buffer[i];
99
this.offset++;
100
if(n === 0x7b){ this.onToken(LEFT_BRACE, "{"); // {
101
}else if(n === 0x7d){ this.onToken(RIGHT_BRACE, "}"); // }
102
}else if(n === 0x5b){ this.onToken(LEFT_BRACKET, "["); // [
103
}else if(n === 0x5d){ this.onToken(RIGHT_BRACKET, "]"); // ]
104
}else if(n === 0x3a){ this.onToken(COLON, ":"); // :
105
}else if(n === 0x2c){ this.onToken(COMMA, ","); // ,
106
}else if(n === 0x74){ this.tState = TRUE1; // t
107
}else if(n === 0x66){ this.tState = FALSE1; // f
108
}else if(n === 0x6e){ this.tState = NULL1; // n
109
}else if(n === 0x22){ this.string = ""; this.tState = STRING1; // "
110
}else if(n === 0x2d){ this.negative = true; this.tState = NUMBER1; // -
111
}else if(n === 0x30){ this.magnatude = 0; this.tState = NUMBER2; // 0
112
}else{
113
if (n > 0x30 && n < 0x40) { // 1-9
114
this.magnatude = n - 0x30; this.tState = NUMBER3;
115
} else if (n === 0x20 || n === 0x09 || n === 0x0a || n === 0x0d) {
116
// whitespace
117
} else { this.charError(buffer, i); }
118
}
119
}else if (this.tState === STRING1){ // After open quote
120
n = buffer[i]; // get current byte from buffer
121
// check for carry over of a multi byte char split between data chunks
122
// & fill temp buffer it with start of this data chunk up to the boundary limit set in the last iteration
123
if (this.bytes_remaining > 0) {
124
for (var j = 0; j < this.bytes_remaining; j++) {
125
this.temp_buffs[this.bytes_in_sequence][this.bytes_in_sequence - this.bytes_remaining + j] = buffer[j];
126
}
127
this.string += this.temp_buffs[this.bytes_in_sequence].toString();
128
this.bytes_in_sequence = this.bytes_remaining = 0;
129
i = i + j - 1;
130
} else if (this.bytes_remaining === 0 && n >= 128) { // else if no remainder bytes carried over, parse multi byte (>=128) chars one at a time
131
if (n <= 193) {
132
this.onError(new Error("Invalid UTF-8 character at position " + i + " in state " + toknam(this.tState)));
133
return
134
}
135
if ((n >= 194) && (n <= 223)) this.bytes_in_sequence = 2;
136
if ((n >= 224) && (n <= 239)) this.bytes_in_sequence = 3;
137
if ((n >= 240) && (n <= 244)) this.bytes_in_sequence = 4;
138
if ((this.bytes_in_sequence + i) > buffer.length) { // if bytes needed to complete char fall outside buffer length, we have a boundary split
139
for (var k = 0; k <= (buffer.length - 1 - i); k++) {
140
this.temp_buffs[this.bytes_in_sequence][k] = buffer[i + k]; // fill temp buffer of correct size with bytes available in this chunk
141
}
142
this.bytes_remaining = (i + this.bytes_in_sequence) - buffer.length;
143
i = buffer.length - 1;
144
} else {
145
this.string += buffer.slice(i, (i + this.bytes_in_sequence)).toString();
146
i = i + this.bytes_in_sequence - 1;
147
}
148
} else if (n === 0x22) { this.tState = START; this.onToken(STRING, this.string); this.offset += Buffer.byteLength(this.string, 'utf8') + 1; this.string = undefined; }
149
else if (n === 0x5c) { this.tState = STRING2; }
150
else if (n >= 0x20) { this.string += String.fromCharCode(n); }
151
else { this.charError(buffer, i); }
152
}else if (this.tState === STRING2){ // After backslash
153
n = buffer[i];
154
if(n === 0x22){ this.string += "\""; this.tState = STRING1;
155
}else if(n === 0x5c){ this.string += "\\"; this.tState = STRING1;
156
}else if(n === 0x2f){ this.string += "\/"; this.tState = STRING1;
157
}else if(n === 0x62){ this.string += "\b"; this.tState = STRING1;
158
}else if(n === 0x66){ this.string += "\f"; this.tState = STRING1;
159
}else if(n === 0x6e){ this.string += "\n"; this.tState = STRING1;
160
}else if(n === 0x72){ this.string += "\r"; this.tState = STRING1;
161
}else if(n === 0x74){ this.string += "\t"; this.tState = STRING1;
162
}else if(n === 0x75){ this.unicode = ""; this.tState = STRING3;
163
}else{
164
this.charError(buffer, i);
165
}
166
}else if (this.tState === STRING3 || this.tState === STRING4 || this.tState === STRING5 || this.tState === STRING6){ // unicode hex codes
167
n = buffer[i];
168
// 0-9 A-F a-f
169
if ((n >= 0x30 && n < 0x40) || (n > 0x40 && n <= 0x46) || (n > 0x60 && n <= 0x66)) {
170
this.unicode += String.fromCharCode(n);
171
if (this.tState++ === STRING6) {
172
this.string += String.fromCharCode(parseInt(this.unicode, 16));
173
this.unicode = undefined;
174
this.tState = STRING1;
175
}
176
} else {
177
this.charError(buffer, i);
178
}
179
}else if (this.tState === NUMBER1){ // after minus
180
n = buffer[i];
181
this.numberLength++;
182
if (n === 0x30) { this.magnatude = 0; this.tState = NUMBER2; }
183
else if (n > 0x30 && n < 0x40) { this.magnatude = n - 0x30; this.tState = NUMBER3; }
184
else { this.charError(buffer, i); }
185
}else if (this.tState === NUMBER2){ // * After initial zero
186
n = buffer[i];
187
this.numberLength++;
188
if(n === 0x2e){ // .
189
this.position = 0.1; this.tState = NUMBER4;
190
}else if(n === 0x65 || n === 0x45){ // e/E
191
this.exponent = 0; this.tState = NUMBER6;
192
}else{
193
this.tState = START;
194
this.onToken(NUMBER, 0);
195
this.offset += this.numberLength - 1;
196
this.numberLength = 0;
197
this.magnatude = undefined;
198
this.negative = undefined;
199
i--;
200
}
201
}else if (this.tState === NUMBER3){ // * After digit (before period)
202
n = buffer[i];
203
this.numberLength++;
204
if(n === 0x2e){ // .
205
this.position = 0.1; this.tState = NUMBER4;
206
}else if(n === 0x65 || n === 0x45){ // e/E
207
this.exponent = 0; this.tState = NUMBER6;
208
}else{
209
if (n >= 0x30 && n < 0x40) { this.magnatude = this.magnatude * 10 + n - 0x30; }
210
else {
211
this.tState = START;
212
if (this.negative) {
213
this.magnatude = -this.magnatude;
214
this.negative = undefined;
215
}
216
this.onToken(NUMBER, this.magnatude);
217
this.offset += this.numberLength - 1;
218
this.numberLength = 0;
219
this.magnatude = undefined;
220
i--;
221
}
222
}
223
}else if (this.tState === NUMBER4){ // After period
224
n = buffer[i];
225
this.numberLength++;
226
if (n >= 0x30 && n < 0x40) { // 0-9
227
this.magnatude += this.position * (n - 0x30);
228
this.position /= 10;
229
this.tState = NUMBER5;
230
} else { this.charError(buffer, i); }
231
}else if (this.tState === NUMBER5){ // * After digit (after period)
232
n = buffer[i];
233
this.numberLength++;
234
if (n >= 0x30 && n < 0x40) { // 0-9
235
this.magnatude += this.position * (n - 0x30);
236
this.position /= 10;
237
}
238
else if (n === 0x65 || n === 0x45) { this.exponent = 0; this.tState = NUMBER6; } // E/e
239
else {
240
this.tState = START;
241
if (this.negative) {
242
this.magnatude = -this.magnatude;
243
this.negative = undefined;
244
}
245
this.onToken(NUMBER, this.negative ? -this.magnatude : this.magnatude);
246
this.offset += this.numberLength - 1;
247
this.numberLength = 0;
248
this.magnatude = undefined;
249
this.position = undefined;
250
i--;
251
}
252
}else if (this.tState === NUMBER6){ // After E
253
n = buffer[i];
254
this.numberLength++;
255
if (n === 0x2b || n === 0x2d) { // +/-
256
if (n === 0x2d) { this.negativeExponent = true; }
257
this.tState = NUMBER7;
258
}
259
else if (n >= 0x30 && n < 0x40) {
260
this.exponent = this.exponent * 10 + (n - 0x30);
261
this.tState = NUMBER8;
262
}
263
else { this.charError(buffer, i); }
264
}else if (this.tState === NUMBER7){ // After +/-
265
n = buffer[i];
266
this.numberLength++;
267
if (n >= 0x30 && n < 0x40) { // 0-9
268
this.exponent = this.exponent * 10 + (n - 0x30);
269
this.tState = NUMBER8;
270
}
271
else { this.charError(buffer, i); }
272
}else if (this.tState === NUMBER8){ // * After digit (after +/-)
273
n = buffer[i];
274
this.numberLength++;
275
if (n >= 0x30 && n < 0x40) { // 0-9
276
this.exponent = this.exponent * 10 + (n - 0x30);
277
}
278
else {
279
if (this.negativeExponent) {
280
this.exponent = -this.exponent;
281
this.negativeExponent = undefined;
282
}
283
this.magnatude *= Math.pow(10, this.exponent);
284
this.exponent = undefined;
285
if (this.negative) {
286
this.magnatude = -this.magnatude;
287
this.negative = undefined;
288
}
289
this.tState = START;
290
this.onToken(NUMBER, this.magnatude);
291
this.offset += this.numberLength - 1;
292
this.numberLength = 0;
293
this.magnatude = undefined;
294
i--;
295
}
296
}else if (this.tState === TRUE1){ // r
297
if (buffer[i] === 0x72) { this.tState = TRUE2; }
298
else { this.charError(buffer, i); }
299
}else if (this.tState === TRUE2){ // u
300
if (buffer[i] === 0x75) { this.tState = TRUE3; }
301
else { this.charError(buffer, i); }
302
}else if (this.tState === TRUE3){ // e
303
if (buffer[i] === 0x65) { this.tState = START; this.onToken(TRUE, true); this.offset+= 3; }
304
else { this.charError(buffer, i); }
305
}else if (this.tState === FALSE1){ // a
306
if (buffer[i] === 0x61) { this.tState = FALSE2; }
307
else { this.charError(buffer, i); }
308
}else if (this.tState === FALSE2){ // l
309
if (buffer[i] === 0x6c) { this.tState = FALSE3; }
310
else { this.charError(buffer, i); }
311
}else if (this.tState === FALSE3){ // s
312
if (buffer[i] === 0x73) { this.tState = FALSE4; }
313
else { this.charError(buffer, i); }
314
}else if (this.tState === FALSE4){ // e
315
if (buffer[i] === 0x65) { this.tState = START; this.onToken(FALSE, false); this.offset+= 4; }
316
else { this.charError(buffer, i); }
317
}else if (this.tState === NULL1){ // u
318
if (buffer[i] === 0x75) { this.tState = NULL2; }
319
else { this.charError(buffer, i); }
320
}else if (this.tState === NULL2){ // l
321
if (buffer[i] === 0x6c) { this.tState = NULL3; }
322
else { this.charError(buffer, i); }
323
}else if (this.tState === NULL3){ // l
324
if (buffer[i] === 0x6c) { this.tState = START; this.onToken(NULL, null); this.offset += 3; }
325
else { this.charError(buffer, i); }
326
}
327
}
328
};
329
proto.onToken = function (token, value) {
330
// Override this to get events
331
};
332
333
proto.parseError = function (token, value) {
334
this.onError(new Error("Unexpected " + toknam(token) + (value ? ("(" + JSON.stringify(value) + ")") : "") + " in state " + toknam(this.state)));
335
};
336
proto.push = function () {
337
this.stack.push({value: this.value, key: this.key, mode: this.mode});
338
};
339
proto.pop = function () {
340
var value = this.value;
341
var parent = this.stack.pop();
342
this.value = parent.value;
343
this.key = parent.key;
344
this.mode = parent.mode;
345
this.emit(value);
346
if (!this.mode) { this.state = VALUE; }
347
};
348
proto.emit = function (value) {
349
if (this.mode) { this.state = COMMA; }
350
this.onValue(value);
351
};
352
proto.onValue = function (value) {
353
// Override me
354
};
355
proto.onToken = function (token, value) {
356
//console.log("OnToken: state=%s token=%s %s", toknam(this.state), toknam(token), value?JSON.stringify(value):"");
357
if(this.state === VALUE){
358
if(token === STRING || token === NUMBER || token === TRUE || token === FALSE || token === NULL){
359
if (this.value) {
360
this.value[this.key] = value;
361
}
362
this.emit(value);
363
}else if(token === LEFT_BRACE){
364
this.push();
365
if (this.value) {
366
this.value = this.value[this.key] = {};
367
} else {
368
this.value = {};
369
}
370
this.key = undefined;
371
this.state = KEY;
372
this.mode = OBJECT;
373
}else if(token === LEFT_BRACKET){
374
this.push();
375
if (this.value) {
376
this.value = this.value[this.key] = [];
377
} else {
378
this.value = [];
379
}
380
this.key = 0;
381
this.mode = ARRAY;
382
this.state = VALUE;
383
}else if(token === RIGHT_BRACE){
384
if (this.mode === OBJECT) {
385
this.pop();
386
} else {
387
this.parseError(token, value);
388
}
389
}else if(token === RIGHT_BRACKET){
390
if (this.mode === ARRAY) {
391
this.pop();
392
} else {
393
this.parseError(token, value);
394
}
395
}else{
396
this.parseError(token, value);
397
}
398
}else if(this.state === KEY){
399
if (token === STRING) {
400
this.key = value;
401
this.state = COLON;
402
} else if (token === RIGHT_BRACE) {
403
this.pop();
404
} else {
405
this.parseError(token, value);
406
}
407
}else if(this.state === COLON){
408
if (token === COLON) { this.state = VALUE; }
409
else { this.parseError(token, value); }
410
}else if(this.state === COMMA){
411
if (token === COMMA) {
412
if (this.mode === ARRAY) { this.key++; this.state = VALUE; }
413
else if (this.mode === OBJECT) { this.state = KEY; }
414
415
} else if (token === RIGHT_BRACKET && this.mode === ARRAY || token === RIGHT_BRACE && this.mode === OBJECT) {
416
this.pop();
417
} else {
418
this.parseError(token, value);
419
}
420
}else{
421
this.parseError(token, value);
422
}
423
};
424
425
Parser.C = C;
426
427
module.exports = Parser;
428
429