react / wstein / node_modules / browserify / node_modules / module-deps / node_modules / detective / node_modules / acorn / src / tokenize.js
80559 viewsimport {isIdentifierStart, isIdentifierChar} from "./identifier"1import {types as tt, keywords as keywordTypes} from "./tokentype"2import {Parser} from "./state"3import {SourceLocation} from "./location"4import {lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace} from "./whitespace"56// Object type used to represent tokens. Note that normally, tokens7// simply exist as properties on the parser object. This is only8// used for the onToken callback and the external tokenizer.910export class Token {11constructor(p) {12this.type = p.type13this.value = p.value14this.start = p.start15this.end = p.end16if (p.options.locations)17this.loc = new SourceLocation(p, p.startLoc, p.endLoc)18if (p.options.ranges)19this.range = [p.start, p.end]20}21}2223// ## Tokenizer2425const pp = Parser.prototype2627// Are we running under Rhino?28const isRhino = typeof Packages !== "undefined"2930// Move to the next token3132pp.next = function() {33if (this.options.onToken)34this.options.onToken(new Token(this))3536this.lastTokEnd = this.end37this.lastTokStart = this.start38this.lastTokEndLoc = this.endLoc39this.lastTokStartLoc = this.startLoc40this.nextToken()41}4243pp.getToken = function() {44this.next()45return new Token(this)46}4748// If we're in an ES6 environment, make parsers iterable49if (typeof Symbol !== "undefined")50pp[Symbol.iterator] = function () {51let self = this52return {next: function () {53let token = self.getToken()54return {55done: token.type === tt.eof,56value: token57}58}}59}6061// Toggle strict mode. Re-reads the next number or string to please62// pedantic tests (`"use strict"; 010;` should fail).6364pp.setStrict = function(strict) {65this.strict = strict66if (this.type !== tt.num && this.type !== tt.string) return67this.pos = this.start68if (this.options.locations) {69while (this.pos < this.lineStart) {70this.lineStart = this.input.lastIndexOf("\n", this.lineStart - 2) + 171--this.curLine72}73}74this.nextToken()75}7677pp.curContext = function() {78return this.context[this.context.length - 1]79}8081// Read a single token, updating the parser object's token-related82// properties.8384pp.nextToken = function() {85let curContext = this.curContext()86if (!curContext || !curContext.preserveSpace) this.skipSpace()8788this.start = this.pos89if (this.options.locations) this.startLoc = this.curPosition()90if (this.pos >= this.input.length) return this.finishToken(tt.eof)9192if (curContext.override) return curContext.override(this)93else this.readToken(this.fullCharCodeAtPos())94}9596pp.readToken = function(code) {97// Identifier or keyword. '\uXXXX' sequences are allowed in98// identifiers, so '\' also dispatches to that.99if (isIdentifierStart(code, this.options.ecmaVersion >= 6) || code === 92 /* '\' */)100return this.readWord()101102return this.getTokenFromCode(code)103}104105pp.fullCharCodeAtPos = function() {106let code = this.input.charCodeAt(this.pos)107if (code <= 0xd7ff || code >= 0xe000) return code108let next = this.input.charCodeAt(this.pos + 1)109return (code << 10) + next - 0x35fdc00110}111112pp.skipBlockComment = function() {113let startLoc = this.options.onComment && this.options.locations && this.curPosition()114let start = this.pos, end = this.input.indexOf("*/", this.pos += 2)115if (end === -1) this.raise(this.pos - 2, "Unterminated comment")116this.pos = end + 2117if (this.options.locations) {118lineBreakG.lastIndex = start119let match120while ((match = lineBreakG.exec(this.input)) && match.index < this.pos) {121++this.curLine122this.lineStart = match.index + match[0].length123}124}125if (this.options.onComment)126this.options.onComment(true, this.input.slice(start + 2, end), start, this.pos,127startLoc, this.options.locations && this.curPosition())128}129130pp.skipLineComment = function(startSkip) {131let start = this.pos132let startLoc = this.options.onComment && this.options.locations && this.curPosition()133let ch = this.input.charCodeAt(this.pos+=startSkip)134while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) {135++this.pos136ch = this.input.charCodeAt(this.pos)137}138if (this.options.onComment)139this.options.onComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos,140startLoc, this.options.locations && this.curPosition())141}142143// Called at the start of the parse and after every token. Skips144// whitespace and comments, and.145146pp.skipSpace = function() {147while (this.pos < this.input.length) {148let ch = this.input.charCodeAt(this.pos)149if (ch === 32) { // ' '150++this.pos151} else if (ch === 13) {152++this.pos153let next = this.input.charCodeAt(this.pos)154if (next === 10) {155++this.pos156}157if (this.options.locations) {158++this.curLine159this.lineStart = this.pos160}161} else if (ch === 10 || ch === 8232 || ch === 8233) {162++this.pos163if (this.options.locations) {164++this.curLine165this.lineStart = this.pos166}167} else if (ch > 8 && ch < 14) {168++this.pos169} else if (ch === 47) { // '/'170let next = this.input.charCodeAt(this.pos + 1)171if (next === 42) { // '*'172this.skipBlockComment()173} else if (next === 47) { // '/'174this.skipLineComment(2)175} else break176} else if (ch === 160) { // '\xa0'177++this.pos178} else if (ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) {179++this.pos180} else {181break182}183}184}185186// Called at the end of every token. Sets `end`, `val`, and187// maintains `context` and `exprAllowed`, and skips the space after188// the token, so that the next one's `start` will point at the189// right position.190191pp.finishToken = function(type, val) {192this.end = this.pos193if (this.options.locations) this.endLoc = this.curPosition()194let prevType = this.type195this.type = type196this.value = val197198this.updateContext(prevType)199}200201// ### Token reading202203// This is the function that is called to fetch the next token. It204// is somewhat obscure, because it works in character codes rather205// than characters, and because operator parsing has been inlined206// into it.207//208// All in the name of speed.209//210pp.readToken_dot = function() {211let next = this.input.charCodeAt(this.pos + 1)212if (next >= 48 && next <= 57) return this.readNumber(true)213let next2 = this.input.charCodeAt(this.pos + 2)214if (this.options.ecmaVersion >= 6 && next === 46 && next2 === 46) { // 46 = dot '.'215this.pos += 3216return this.finishToken(tt.ellipsis)217} else {218++this.pos219return this.finishToken(tt.dot)220}221}222223pp.readToken_slash = function() { // '/'224let next = this.input.charCodeAt(this.pos + 1)225if (this.exprAllowed) {++this.pos; return this.readRegexp();}226if (next === 61) return this.finishOp(tt.assign, 2)227return this.finishOp(tt.slash, 1)228}229230pp.readToken_mult_modulo = function(code) { // '%*'231let next = this.input.charCodeAt(this.pos + 1)232if (next === 61) return this.finishOp(tt.assign, 2)233return this.finishOp(code === 42 ? tt.star : tt.modulo, 1)234}235236pp.readToken_pipe_amp = function(code) { // '|&'237let next = this.input.charCodeAt(this.pos + 1)238if (next === code) return this.finishOp(code === 124 ? tt.logicalOR : tt.logicalAND, 2)239if (next === 61) return this.finishOp(tt.assign, 2)240return this.finishOp(code === 124 ? tt.bitwiseOR : tt.bitwiseAND, 1)241}242243pp.readToken_caret = function() { // '^'244let next = this.input.charCodeAt(this.pos + 1)245if (next === 61) return this.finishOp(tt.assign, 2)246return this.finishOp(tt.bitwiseXOR, 1)247}248249pp.readToken_plus_min = function(code) { // '+-'250let next = this.input.charCodeAt(this.pos + 1)251if (next === code) {252if (next == 45 && this.input.charCodeAt(this.pos + 2) == 62 &&253lineBreak.test(this.input.slice(this.lastTokEnd, this.pos))) {254// A `-->` line comment255this.skipLineComment(3)256this.skipSpace()257return this.nextToken()258}259return this.finishOp(tt.incDec, 2)260}261if (next === 61) return this.finishOp(tt.assign, 2)262return this.finishOp(tt.plusMin, 1)263}264265pp.readToken_lt_gt = function(code) { // '<>'266let next = this.input.charCodeAt(this.pos + 1)267let size = 1268if (next === code) {269size = code === 62 && this.input.charCodeAt(this.pos + 2) === 62 ? 3 : 2270if (this.input.charCodeAt(this.pos + size) === 61) return this.finishOp(tt.assign, size + 1)271return this.finishOp(tt.bitShift, size)272}273if (next == 33 && code == 60 && this.input.charCodeAt(this.pos + 2) == 45 &&274this.input.charCodeAt(this.pos + 3) == 45) {275if (this.inModule) this.unexpected()276// `<!--`, an XML-style comment that should be interpreted as a line comment277this.skipLineComment(4)278this.skipSpace()279return this.nextToken()280}281if (next === 61)282size = this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2283return this.finishOp(tt.relational, size)284}285286pp.readToken_eq_excl = function(code) { // '=!'287let next = this.input.charCodeAt(this.pos + 1)288if (next === 61) return this.finishOp(tt.equality, this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2)289if (code === 61 && next === 62 && this.options.ecmaVersion >= 6) { // '=>'290this.pos += 2291return this.finishToken(tt.arrow)292}293return this.finishOp(code === 61 ? tt.eq : tt.prefix, 1)294}295296pp.getTokenFromCode = function(code) {297switch (code) {298// The interpretation of a dot depends on whether it is followed299// by a digit or another two dots.300case 46: // '.'301return this.readToken_dot()302303// Punctuation tokens.304case 40: ++this.pos; return this.finishToken(tt.parenL)305case 41: ++this.pos; return this.finishToken(tt.parenR)306case 59: ++this.pos; return this.finishToken(tt.semi)307case 44: ++this.pos; return this.finishToken(tt.comma)308case 91: ++this.pos; return this.finishToken(tt.bracketL)309case 93: ++this.pos; return this.finishToken(tt.bracketR)310case 123: ++this.pos; return this.finishToken(tt.braceL)311case 125: ++this.pos; return this.finishToken(tt.braceR)312case 58: ++this.pos; return this.finishToken(tt.colon)313case 63: ++this.pos; return this.finishToken(tt.question)314315case 96: // '`'316if (this.options.ecmaVersion < 6) break317++this.pos318return this.finishToken(tt.backQuote)319320case 48: // '0'321let next = this.input.charCodeAt(this.pos + 1)322if (next === 120 || next === 88) return this.readRadixNumber(16); // '0x', '0X' - hex number323if (this.options.ecmaVersion >= 6) {324if (next === 111 || next === 79) return this.readRadixNumber(8); // '0o', '0O' - octal number325if (next === 98 || next === 66) return this.readRadixNumber(2); // '0b', '0B' - binary number326}327// Anything else beginning with a digit is an integer, octal328// number, or float.329case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9330return this.readNumber(false)331332// Quotes produce strings.333case 34: case 39: // '"', "'"334return this.readString(code)335336// Operators are parsed inline in tiny state machines. '=' (61) is337// often referred to. `finishOp` simply skips the amount of338// characters it is given as second argument, and returns a token339// of the type given by its first argument.340341case 47: // '/'342return this.readToken_slash()343344case 37: case 42: // '%*'345return this.readToken_mult_modulo(code)346347case 124: case 38: // '|&'348return this.readToken_pipe_amp(code)349350case 94: // '^'351return this.readToken_caret()352353case 43: case 45: // '+-'354return this.readToken_plus_min(code)355356case 60: case 62: // '<>'357return this.readToken_lt_gt(code)358359case 61: case 33: // '=!'360return this.readToken_eq_excl(code)361362case 126: // '~'363return this.finishOp(tt.prefix, 1)364}365366this.raise(this.pos, "Unexpected character '" + codePointToString(code) + "'")367}368369pp.finishOp = function(type, size) {370let str = this.input.slice(this.pos, this.pos + size)371this.pos += size372return this.finishToken(type, str)373}374375var regexpUnicodeSupport = false376try { new RegExp("\uffff", "u"); regexpUnicodeSupport = true }377catch(e) {}378379// Parse a regular expression. Some context-awareness is necessary,380// since a '/' inside a '[]' set does not end the expression.381382pp.readRegexp = function() {383let escaped, inClass, start = this.pos384for (;;) {385if (this.pos >= this.input.length) this.raise(start, "Unterminated regular expression")386let ch = this.input.charAt(this.pos)387if (lineBreak.test(ch)) this.raise(start, "Unterminated regular expression")388if (!escaped) {389if (ch === "[") inClass = true390else if (ch === "]" && inClass) inClass = false391else if (ch === "/" && !inClass) break392escaped = ch === "\\"393} else escaped = false394++this.pos395}396let content = this.input.slice(start, this.pos)397++this.pos398// Need to use `readWord1` because '\uXXXX' sequences are allowed399// here (don't ask).400let mods = this.readWord1()401let tmp = content402if (mods) {403let validFlags = /^[gmsiy]*$/404if (this.options.ecmaVersion >= 6) validFlags = /^[gmsiyu]*$/405if (!validFlags.test(mods)) this.raise(start, "Invalid regular expression flag")406if (mods.indexOf('u') >= 0 && !regexpUnicodeSupport) {407// Replace each astral symbol and every Unicode escape sequence that408// possibly represents an astral symbol or a paired surrogate with a409// single ASCII symbol to avoid throwing on regular expressions that410// are only valid in combination with the `/u` flag.411// Note: replacing with the ASCII symbol `x` might cause false412// negatives in unlikely scenarios. For example, `[\u{61}-b]` is a413// perfectly valid pattern that is equivalent to `[a-b]`, but it would414// be replaced by `[x-b]` which throws an error.415tmp = tmp.replace(/\\u([a-fA-F0-9]{4})|\\u\{([0-9a-fA-F]+)\}|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, "x")416}417}418// Detect invalid regular expressions.419let value = null420// Rhino's regular expression parser is flaky and throws uncatchable exceptions,421// so don't do detection if we are running under Rhino422if (!isRhino) {423try {424new RegExp(tmp)425} catch (e) {426if (e instanceof SyntaxError) this.raise(start, "Error parsing regular expression: " + e.message)427this.raise(e)428}429// Get a regular expression object for this pattern-flag pair, or `null` in430// case the current environment doesn't support the flags it uses.431try {432value = new RegExp(content, mods)433} catch (err) {}434}435return this.finishToken(tt.regexp, {pattern: content, flags: mods, value: value})436}437438// Read an integer in the given radix. Return null if zero digits439// were read, the integer value otherwise. When `len` is given, this440// will return `null` unless the integer has exactly `len` digits.441442pp.readInt = function(radix, len) {443let start = this.pos, total = 0444for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) {445let code = this.input.charCodeAt(this.pos), val446if (code >= 97) val = code - 97 + 10; // a447else if (code >= 65) val = code - 65 + 10; // A448else if (code >= 48 && code <= 57) val = code - 48; // 0-9449else val = Infinity450if (val >= radix) break451++this.pos452total = total * radix + val453}454if (this.pos === start || len != null && this.pos - start !== len) return null455456return total457}458459pp.readRadixNumber = function(radix) {460this.pos += 2; // 0x461let val = this.readInt(radix)462if (val == null) this.raise(this.start + 2, "Expected number in radix " + radix)463if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number")464return this.finishToken(tt.num, val)465}466467// Read an integer, octal integer, or floating-point number.468469pp.readNumber = function(startsWithDot) {470let start = this.pos, isFloat = false, octal = this.input.charCodeAt(this.pos) === 48471if (!startsWithDot && this.readInt(10) === null) this.raise(start, "Invalid number")472if (this.input.charCodeAt(this.pos) === 46) {473++this.pos474this.readInt(10)475isFloat = true476}477let next = this.input.charCodeAt(this.pos)478if (next === 69 || next === 101) { // 'eE'479next = this.input.charCodeAt(++this.pos)480if (next === 43 || next === 45) ++this.pos; // '+-'481if (this.readInt(10) === null) this.raise(start, "Invalid number")482isFloat = true483}484if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number")485486let str = this.input.slice(start, this.pos), val487if (isFloat) val = parseFloat(str)488else if (!octal || str.length === 1) val = parseInt(str, 10)489else if (/[89]/.test(str) || this.strict) this.raise(start, "Invalid number")490else val = parseInt(str, 8)491return this.finishToken(tt.num, val)492}493494// Read a string value, interpreting backslash-escapes.495496pp.readCodePoint = function() {497let ch = this.input.charCodeAt(this.pos), code498499if (ch === 123) {500if (this.options.ecmaVersion < 6) this.unexpected()501++this.pos502code = this.readHexChar(this.input.indexOf('}', this.pos) - this.pos)503++this.pos504if (code > 0x10FFFF) this.unexpected()505} else {506code = this.readHexChar(4)507}508return code509}510511function codePointToString(code) {512// UTF-16 Decoding513if (code <= 0xFFFF) return String.fromCharCode(code)514return String.fromCharCode(((code - 0x10000) >> 10) + 0xD800,515((code - 0x10000) & 1023) + 0xDC00)516}517518pp.readString = function(quote) {519let out = "", chunkStart = ++this.pos520for (;;) {521if (this.pos >= this.input.length) this.raise(this.start, "Unterminated string constant")522let ch = this.input.charCodeAt(this.pos)523if (ch === quote) break524if (ch === 92) { // '\'525out += this.input.slice(chunkStart, this.pos)526out += this.readEscapedChar()527chunkStart = this.pos528} else {529if (isNewLine(ch)) this.raise(this.start, "Unterminated string constant")530++this.pos531}532}533out += this.input.slice(chunkStart, this.pos++)534return this.finishToken(tt.string, out)535}536537// Reads template string tokens.538539pp.readTmplToken = function() {540let out = "", chunkStart = this.pos541for (;;) {542if (this.pos >= this.input.length) this.raise(this.start, "Unterminated template")543let ch = this.input.charCodeAt(this.pos)544if (ch === 96 || ch === 36 && this.input.charCodeAt(this.pos + 1) === 123) { // '`', '${'545if (this.pos === this.start && this.type === tt.template) {546if (ch === 36) {547this.pos += 2548return this.finishToken(tt.dollarBraceL)549} else {550++this.pos551return this.finishToken(tt.backQuote)552}553}554out += this.input.slice(chunkStart, this.pos)555return this.finishToken(tt.template, out)556}557if (ch === 92) { // '\'558out += this.input.slice(chunkStart, this.pos)559out += this.readEscapedChar()560chunkStart = this.pos561} else if (isNewLine(ch)) {562out += this.input.slice(chunkStart, this.pos)563++this.pos564if (ch === 13 && this.input.charCodeAt(this.pos) === 10) {565++this.pos566out += "\n"567} else {568out += String.fromCharCode(ch)569}570if (this.options.locations) {571++this.curLine572this.lineStart = this.pos573}574chunkStart = this.pos575} else {576++this.pos577}578}579}580581// Used to read escaped characters582583pp.readEscapedChar = function() {584let ch = this.input.charCodeAt(++this.pos)585let octal = /^[0-7]+/.exec(this.input.slice(this.pos, this.pos + 3))586if (octal) octal = octal[0]587while (octal && parseInt(octal, 8) > 255) octal = octal.slice(0, -1)588if (octal === "0") octal = null589++this.pos590if (octal) {591if (this.strict) this.raise(this.pos - 2, "Octal literal in strict mode")592this.pos += octal.length - 1593return String.fromCharCode(parseInt(octal, 8))594} else {595switch (ch) {596case 110: return "\n"; // 'n' -> '\n'597case 114: return "\r"; // 'r' -> '\r'598case 120: return String.fromCharCode(this.readHexChar(2)); // 'x'599case 117: return codePointToString(this.readCodePoint()); // 'u'600case 116: return "\t"; // 't' -> '\t'601case 98: return "\b"; // 'b' -> '\b'602case 118: return "\u000b"; // 'v' -> '\u000b'603case 102: return "\f"; // 'f' -> '\f'604case 48: return "\0"; // 0 -> '\0'605case 13: if (this.input.charCodeAt(this.pos) === 10) ++this.pos; // '\r\n'606case 10: // ' \n'607if (this.options.locations) { this.lineStart = this.pos; ++this.curLine }608return ""609default: return String.fromCharCode(ch)610}611}612}613614// Used to read character escape sequences ('\x', '\u', '\U').615616pp.readHexChar = function(len) {617let n = this.readInt(16, len)618if (n === null) this.raise(this.start, "Bad character escape sequence")619return n620}621622// Used to signal to callers of `readWord1` whether the word623// contained any escape sequences. This is needed because words with624// escape sequences must not be interpreted as keywords.625626var containsEsc627628// Read an identifier, and return it as a string. Sets `containsEsc`629// to whether the word contained a '\u' escape.630//631// Incrementally adds only escaped chars, adding other chunks as-is632// as a micro-optimization.633634pp.readWord1 = function() {635containsEsc = false636let word = "", first = true, chunkStart = this.pos637let astral = this.options.ecmaVersion >= 6638while (this.pos < this.input.length) {639let ch = this.fullCharCodeAtPos()640if (isIdentifierChar(ch, astral)) {641this.pos += ch <= 0xffff ? 1 : 2642} else if (ch === 92) { // "\"643containsEsc = true644word += this.input.slice(chunkStart, this.pos)645let escStart = this.pos646if (this.input.charCodeAt(++this.pos) != 117) // "u"647this.raise(this.pos, "Expecting Unicode escape sequence \\uXXXX")648++this.pos649let esc = this.readCodePoint()650if (!(first ? isIdentifierStart : isIdentifierChar)(esc, astral))651this.raise(escStart, "Invalid Unicode escape")652word += codePointToString(esc)653chunkStart = this.pos654} else {655break656}657first = false658}659return word + this.input.slice(chunkStart, this.pos)660}661662// Read an identifier or keyword token. Will check for reserved663// words when necessary.664665pp.readWord = function() {666let word = this.readWord1()667let type = tt.name668if ((this.options.ecmaVersion >= 6 || !containsEsc) && this.isKeyword(word))669type = keywordTypes[word]670return this.finishToken(type, word)671}672673674