react / wstein / node_modules / jest-cli / node_modules / jsdom / node_modules / htmlparser2 / lib / Parser.js
80684 viewsvar Tokenizer = require("./Tokenizer.js");12/*3Options:45xmlMode: Special behavior for script/style tags (true by default)6lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)7lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)8*/910/*11Callbacks:1213oncdataend,14oncdatastart,15onclosetag,16oncomment,17oncommentend,18onerror,19onopentag,20onprocessinginstruction,21onreset,22ontext23*/2425var formTags = {26input: true,27option: true,28optgroup: true,29select: true,30button: true,31datalist: true,32textarea: true33};3435var openImpliesClose = {36tr : { tr:true, th:true, td:true },37th : { th:true },38td : { thead:true, td:true },39body : { head:true, link:true, script:true },40li : { li:true },41p : { p:true },42h1 : { p:true },43h2 : { p:true },44h3 : { p:true },45h4 : { p:true },46h5 : { p:true },47h6 : { p:true },48select : formTags,49input : formTags,50output : formTags,51button : formTags,52datalist: formTags,53textarea: formTags,54option : { option:true },55optgroup: { optgroup:true }56};5758var voidElements = {59__proto__: null,60area: true,61base: true,62basefont: true,63br: true,64col: true,65command: true,66embed: true,67frame: true,68hr: true,69img: true,70input: true,71isindex: true,72keygen: true,73link: true,74meta: true,75param: true,76source: true,77track: true,78wbr: true,7980//common self closing svg elements81path: true,82circle: true,83ellipse: true,84line: true,85rect: true,86use: true,87stop: true,88polyline: true,89polygone: true90};9192var re_nameEnd = /\s|\//;9394function Parser(cbs, options){95this._options = options || {};96this._cbs = cbs || {};9798this._tagname = "";99this._attribname = "";100this._attribvalue = "";101this._attribs = null;102this._stack = [];103104this.startIndex = 0;105this.endIndex = null;106107this._lowerCaseTagNames = "lowerCaseTags" in this._options ?108!!this._options.lowerCaseTags :109!this._options.xmlMode;110this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?111!!this._options.lowerCaseAttributeNames :112!this._options.xmlMode;113114this._tokenizer = new Tokenizer(this._options, this);115116if(this._cbs.onparserinit) this._cbs.onparserinit(this);117}118119require("util").inherits(Parser, require("events").EventEmitter);120121Parser.prototype._updatePosition = function(initialOffset){122if(this.endIndex === null){123if(this._tokenizer._sectionStart <= initialOffset){124this.startIndex = 0;125} else {126this.startIndex = this._tokenizer._sectionStart - initialOffset;127}128}129else this.startIndex = this.endIndex + 1;130this.endIndex = this._tokenizer.getAbsoluteIndex();131};132133//Tokenizer event handlers134Parser.prototype.ontext = function(data){135this._updatePosition(1);136this.endIndex--;137138if(this._cbs.ontext) this._cbs.ontext(data);139};140141Parser.prototype.onopentagname = function(name){142if(this._lowerCaseTagNames){143name = name.toLowerCase();144}145146this._tagname = name;147148if(!this._options.xmlMode && name in openImpliesClose) {149for(150var el;151(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];152this.onclosetag(el)153);154}155156if(this._options.xmlMode || !(name in voidElements)){157this._stack.push(name);158}159160if(this._cbs.onopentagname) this._cbs.onopentagname(name);161if(this._cbs.onopentag) this._attribs = {};162};163164Parser.prototype.onopentagend = function(){165this._updatePosition(1);166167if(this._attribs){168if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);169this._attribs = null;170}171172if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){173this._cbs.onclosetag(this._tagname);174}175176this._tagname = "";177};178179Parser.prototype.onclosetag = function(name){180this._updatePosition(1);181182if(this._lowerCaseTagNames){183name = name.toLowerCase();184}185186if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){187var pos = this._stack.lastIndexOf(name);188if(pos !== -1){189if(this._cbs.onclosetag){190pos = this._stack.length - pos;191while(pos--) this._cbs.onclosetag(this._stack.pop());192}193else this._stack.length = pos;194} else if(name === "p" && !this._options.xmlMode){195this.onopentagname(name);196this._closeCurrentTag();197}198} else if(!this._options.xmlMode && (name === "br" || name === "p")){199this.onopentagname(name);200this._closeCurrentTag();201}202};203204Parser.prototype.onselfclosingtag = function(){205if(this._options.xmlMode || this._options.recognizeSelfClosing){206this._closeCurrentTag();207} else {208this.onopentagend();209}210};211212Parser.prototype._closeCurrentTag = function(){213var name = this._tagname;214215this.onopentagend();216217//self-closing tags will be on the top of the stack218//(cheaper check than in onclosetag)219if(this._stack[this._stack.length - 1] === name){220if(this._cbs.onclosetag){221this._cbs.onclosetag(name);222}223this._stack.pop();224}225};226227Parser.prototype.onattribname = function(name){228if(this._lowerCaseAttributeNames){229name = name.toLowerCase();230}231this._attribname = name;232};233234Parser.prototype.onattribdata = function(value){235this._attribvalue += value;236};237238Parser.prototype.onattribend = function(){239if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);240if(241this._attribs &&242!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)243){244this._attribs[this._attribname] = this._attribvalue;245}246this._attribname = "";247this._attribvalue = "";248};249250Parser.prototype._getInstructionName = function(value){251var idx = value.search(re_nameEnd),252name = idx < 0 ? value : value.substr(0, idx);253254if(this._lowerCaseTagNames){255name = name.toLowerCase();256}257258return name;259};260261Parser.prototype.ondeclaration = function(value){262if(this._cbs.onprocessinginstruction){263var name = this._getInstructionName(value);264this._cbs.onprocessinginstruction("!" + name, "!" + value);265}266};267268Parser.prototype.onprocessinginstruction = function(value){269if(this._cbs.onprocessinginstruction){270var name = this._getInstructionName(value);271this._cbs.onprocessinginstruction("?" + name, "?" + value);272}273};274275Parser.prototype.oncomment = function(value){276this._updatePosition(4);277278if(this._cbs.oncomment) this._cbs.oncomment(value);279if(this._cbs.oncommentend) this._cbs.oncommentend();280};281282Parser.prototype.oncdata = function(value){283this._updatePosition(1);284285if(this._options.xmlMode || this._options.recognizeCDATA){286if(this._cbs.oncdatastart) this._cbs.oncdatastart();287if(this._cbs.ontext) this._cbs.ontext(value);288if(this._cbs.oncdataend) this._cbs.oncdataend();289} else {290this.oncomment("[CDATA[" + value + "]]");291}292};293294Parser.prototype.onerror = function(err){295if(this._cbs.onerror) this._cbs.onerror(err);296};297298Parser.prototype.onend = function(){299if(this._cbs.onclosetag){300for(301var i = this._stack.length;302i > 0;303this._cbs.onclosetag(this._stack[--i])304);305}306if(this._cbs.onend) this._cbs.onend();307};308309310//Resets the parser to a blank state, ready to parse a new HTML document311Parser.prototype.reset = function(){312if(this._cbs.onreset) this._cbs.onreset();313this._tokenizer.reset();314315this._tagname = "";316this._attribname = "";317this._attribs = null;318this._stack = [];319320if(this._cbs.onparserinit) this._cbs.onparserinit(this);321};322323//Parses a complete HTML document and pushes it to the handler324Parser.prototype.parseComplete = function(data){325this.reset();326this.end(data);327};328329Parser.prototype.write = function(chunk){330this._tokenizer.write(chunk);331};332333Parser.prototype.end = function(chunk){334this._tokenizer.end(chunk);335};336337Parser.prototype.pause = function(){338this._tokenizer.pause();339};340341Parser.prototype.resume = function(){342this._tokenizer.resume();343};344345//alias for backwards compat346Parser.prototype.parseChunk = Parser.prototype.write;347Parser.prototype.done = Parser.prototype.end;348349module.exports = Parser;350351352