Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
80684 views
1
var Tokenizer = require("./Tokenizer.js");
2
3
/*
4
Options:
5
6
xmlMode: Special behavior for script/style tags (true by default)
7
lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
8
lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
9
*/
10
11
/*
12
Callbacks:
13
14
oncdataend,
15
oncdatastart,
16
onclosetag,
17
oncomment,
18
oncommentend,
19
onerror,
20
onopentag,
21
onprocessinginstruction,
22
onreset,
23
ontext
24
*/
25
26
var formTags = {
27
input: true,
28
option: true,
29
optgroup: true,
30
select: true,
31
button: true,
32
datalist: true,
33
textarea: true
34
};
35
36
var openImpliesClose = {
37
tr : { tr:true, th:true, td:true },
38
th : { th:true },
39
td : { thead:true, td:true },
40
body : { head:true, link:true, script:true },
41
li : { li:true },
42
p : { p:true },
43
h1 : { p:true },
44
h2 : { p:true },
45
h3 : { p:true },
46
h4 : { p:true },
47
h5 : { p:true },
48
h6 : { p:true },
49
select : formTags,
50
input : formTags,
51
output : formTags,
52
button : formTags,
53
datalist: formTags,
54
textarea: formTags,
55
option : { option:true },
56
optgroup: { optgroup:true }
57
};
58
59
var voidElements = {
60
__proto__: null,
61
area: true,
62
base: true,
63
basefont: true,
64
br: true,
65
col: true,
66
command: true,
67
embed: true,
68
frame: true,
69
hr: true,
70
img: true,
71
input: true,
72
isindex: true,
73
keygen: true,
74
link: true,
75
meta: true,
76
param: true,
77
source: true,
78
track: true,
79
wbr: true,
80
81
//common self closing svg elements
82
path: true,
83
circle: true,
84
ellipse: true,
85
line: true,
86
rect: true,
87
use: true,
88
stop: true,
89
polyline: true,
90
polygone: true
91
};
92
93
var re_nameEnd = /\s|\//;
94
95
function Parser(cbs, options){
96
this._options = options || {};
97
this._cbs = cbs || {};
98
99
this._tagname = "";
100
this._attribname = "";
101
this._attribvalue = "";
102
this._attribs = null;
103
this._stack = [];
104
105
this.startIndex = 0;
106
this.endIndex = null;
107
108
this._lowerCaseTagNames = "lowerCaseTags" in this._options ?
109
!!this._options.lowerCaseTags :
110
!this._options.xmlMode;
111
this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?
112
!!this._options.lowerCaseAttributeNames :
113
!this._options.xmlMode;
114
115
this._tokenizer = new Tokenizer(this._options, this);
116
117
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
118
}
119
120
require("util").inherits(Parser, require("events").EventEmitter);
121
122
Parser.prototype._updatePosition = function(initialOffset){
123
if(this.endIndex === null){
124
if(this._tokenizer._sectionStart <= initialOffset){
125
this.startIndex = 0;
126
} else {
127
this.startIndex = this._tokenizer._sectionStart - initialOffset;
128
}
129
}
130
else this.startIndex = this.endIndex + 1;
131
this.endIndex = this._tokenizer.getAbsoluteIndex();
132
};
133
134
//Tokenizer event handlers
135
Parser.prototype.ontext = function(data){
136
this._updatePosition(1);
137
this.endIndex--;
138
139
if(this._cbs.ontext) this._cbs.ontext(data);
140
};
141
142
Parser.prototype.onopentagname = function(name){
143
if(this._lowerCaseTagNames){
144
name = name.toLowerCase();
145
}
146
147
this._tagname = name;
148
149
if(!this._options.xmlMode && name in openImpliesClose) {
150
for(
151
var el;
152
(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];
153
this.onclosetag(el)
154
);
155
}
156
157
if(this._options.xmlMode || !(name in voidElements)){
158
this._stack.push(name);
159
}
160
161
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
162
if(this._cbs.onopentag) this._attribs = {};
163
};
164
165
Parser.prototype.onopentagend = function(){
166
this._updatePosition(1);
167
168
if(this._attribs){
169
if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
170
this._attribs = null;
171
}
172
173
if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
174
this._cbs.onclosetag(this._tagname);
175
}
176
177
this._tagname = "";
178
};
179
180
Parser.prototype.onclosetag = function(name){
181
this._updatePosition(1);
182
183
if(this._lowerCaseTagNames){
184
name = name.toLowerCase();
185
}
186
187
if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
188
var pos = this._stack.lastIndexOf(name);
189
if(pos !== -1){
190
if(this._cbs.onclosetag){
191
pos = this._stack.length - pos;
192
while(pos--) this._cbs.onclosetag(this._stack.pop());
193
}
194
else this._stack.length = pos;
195
} else if(name === "p" && !this._options.xmlMode){
196
this.onopentagname(name);
197
this._closeCurrentTag();
198
}
199
} else if(!this._options.xmlMode && (name === "br" || name === "p")){
200
this.onopentagname(name);
201
this._closeCurrentTag();
202
}
203
};
204
205
Parser.prototype.onselfclosingtag = function(){
206
if(this._options.xmlMode || this._options.recognizeSelfClosing){
207
this._closeCurrentTag();
208
} else {
209
this.onopentagend();
210
}
211
};
212
213
Parser.prototype._closeCurrentTag = function(){
214
var name = this._tagname;
215
216
this.onopentagend();
217
218
//self-closing tags will be on the top of the stack
219
//(cheaper check than in onclosetag)
220
if(this._stack[this._stack.length - 1] === name){
221
if(this._cbs.onclosetag){
222
this._cbs.onclosetag(name);
223
}
224
this._stack.pop();
225
}
226
};
227
228
Parser.prototype.onattribname = function(name){
229
if(this._lowerCaseAttributeNames){
230
name = name.toLowerCase();
231
}
232
this._attribname = name;
233
};
234
235
Parser.prototype.onattribdata = function(value){
236
this._attribvalue += value;
237
};
238
239
Parser.prototype.onattribend = function(){
240
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
241
if(
242
this._attribs &&
243
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
244
){
245
this._attribs[this._attribname] = this._attribvalue;
246
}
247
this._attribname = "";
248
this._attribvalue = "";
249
};
250
251
Parser.prototype._getInstructionName = function(value){
252
var idx = value.search(re_nameEnd),
253
name = idx < 0 ? value : value.substr(0, idx);
254
255
if(this._lowerCaseTagNames){
256
name = name.toLowerCase();
257
}
258
259
return name;
260
};
261
262
Parser.prototype.ondeclaration = function(value){
263
if(this._cbs.onprocessinginstruction){
264
var name = this._getInstructionName(value);
265
this._cbs.onprocessinginstruction("!" + name, "!" + value);
266
}
267
};
268
269
Parser.prototype.onprocessinginstruction = function(value){
270
if(this._cbs.onprocessinginstruction){
271
var name = this._getInstructionName(value);
272
this._cbs.onprocessinginstruction("?" + name, "?" + value);
273
}
274
};
275
276
Parser.prototype.oncomment = function(value){
277
this._updatePosition(4);
278
279
if(this._cbs.oncomment) this._cbs.oncomment(value);
280
if(this._cbs.oncommentend) this._cbs.oncommentend();
281
};
282
283
Parser.prototype.oncdata = function(value){
284
this._updatePosition(1);
285
286
if(this._options.xmlMode || this._options.recognizeCDATA){
287
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
288
if(this._cbs.ontext) this._cbs.ontext(value);
289
if(this._cbs.oncdataend) this._cbs.oncdataend();
290
} else {
291
this.oncomment("[CDATA[" + value + "]]");
292
}
293
};
294
295
Parser.prototype.onerror = function(err){
296
if(this._cbs.onerror) this._cbs.onerror(err);
297
};
298
299
Parser.prototype.onend = function(){
300
if(this._cbs.onclosetag){
301
for(
302
var i = this._stack.length;
303
i > 0;
304
this._cbs.onclosetag(this._stack[--i])
305
);
306
}
307
if(this._cbs.onend) this._cbs.onend();
308
};
309
310
311
//Resets the parser to a blank state, ready to parse a new HTML document
312
Parser.prototype.reset = function(){
313
if(this._cbs.onreset) this._cbs.onreset();
314
this._tokenizer.reset();
315
316
this._tagname = "";
317
this._attribname = "";
318
this._attribs = null;
319
this._stack = [];
320
321
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
322
};
323
324
//Parses a complete HTML document and pushes it to the handler
325
Parser.prototype.parseComplete = function(data){
326
this.reset();
327
this.end(data);
328
};
329
330
Parser.prototype.write = function(chunk){
331
this._tokenizer.write(chunk);
332
};
333
334
Parser.prototype.end = function(chunk){
335
this._tokenizer.end(chunk);
336
};
337
338
Parser.prototype.pause = function(){
339
this._tokenizer.pause();
340
};
341
342
Parser.prototype.resume = function(){
343
this._tokenizer.resume();
344
};
345
346
//alias for backwards compat
347
Parser.prototype.parseChunk = Parser.prototype.write;
348
Parser.prototype.done = Parser.prototype.end;
349
350
module.exports = Parser;
351
352