Path: blob/main/src/vs/platform/contextkey/common/scanner.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import { illegalState } from '../../../base/common/errors.js';7import { localize } from '../../../nls.js';89export const enum TokenType {10LParen,11RParen,12Neg,13Eq,14NotEq,15Lt,16LtEq,17Gt,18GtEq,19RegexOp,20RegexStr,21True,22False,23In,24Not,25And,26Or,27Str,28QuotedStr,29Error,30EOF,31}3233export type Token =34| { type: TokenType.LParen; offset: number }35| { type: TokenType.RParen; offset: number }36| { type: TokenType.Neg; offset: number }37| { type: TokenType.Eq; offset: number; isTripleEq: boolean }38| { type: TokenType.NotEq; offset: number; isTripleEq: boolean }39| { type: TokenType.Lt; offset: number }40| { type: TokenType.LtEq; offset: number }41| { type: TokenType.Gt; offset: number }42| { type: TokenType.GtEq; offset: number }43| { type: TokenType.RegexOp; offset: number }44| { type: TokenType.RegexStr; offset: number; lexeme: string }45| { type: TokenType.True; offset: number }46| { type: TokenType.False; offset: number }47| { type: TokenType.In; offset: number }48| { type: TokenType.Not; offset: number }49| { type: TokenType.And; offset: number }50| { type: TokenType.Or; offset: number }51| { type: TokenType.Str; offset: number; lexeme: string }52| { type: TokenType.QuotedStr; offset: number; lexeme: string }53| { type: TokenType.Error; offset: number; lexeme: string }54| { type: TokenType.EOF; offset: number };5556type KeywordTokenType = TokenType.Not | TokenType.In | TokenType.False | TokenType.True;57type TokenTypeWithoutLexeme =58TokenType.LParen |59TokenType.RParen |60TokenType.Neg |61TokenType.Lt |62TokenType.LtEq |63TokenType.Gt |64TokenType.GtEq |65TokenType.RegexOp |66TokenType.True |67TokenType.False |68TokenType.In |69TokenType.Not |70TokenType.And |71TokenType.Or |72TokenType.EOF;7374/**75* Example:76* `foo == bar'` - note how single quote doesn't have a corresponding closing quote,77* so it's reported as unexpected78*/79export type LexingError = {80offset: number; /** note that this doesn't take into account escape characters from the original encoding of the string, e.g., within an extension manifest file's JSON encoding */81lexeme: string;82additionalInfo?: string;83};8485function hintDidYouMean(...meant: string[]) {86switch (meant.length) {87case 1:88return localize('contextkey.scanner.hint.didYouMean1', "Did you mean {0}?", meant[0]);89case 2:90return localize('contextkey.scanner.hint.didYouMean2', "Did you mean {0} or {1}?", meant[0], meant[1]);91case 3:92return localize('contextkey.scanner.hint.didYouMean3', "Did you mean {0}, {1} or {2}?", meant[0], meant[1], meant[2]);93default: // we just don't expect that many94return undefined;95}96}9798const hintDidYouForgetToOpenOrCloseQuote = localize('contextkey.scanner.hint.didYouForgetToOpenOrCloseQuote', "Did you forget to open or close the quote?");99const hintDidYouForgetToEscapeSlash = localize('contextkey.scanner.hint.didYouForgetToEscapeSlash', "Did you forget to escape the '/' (slash) character? Put two backslashes before it to escape, e.g., '\\\\/\'.");100101/**102* A simple scanner for context keys.103*104* Example:105*106* ```ts107* const scanner = new Scanner().reset('resourceFileName =~ /docker/ && !config.docker.enabled');108* const tokens = [...scanner];109* if (scanner.errorTokens.length > 0) {110* scanner.errorTokens.forEach(err => console.error(`Unexpected token at ${err.offset}: ${err.lexeme}\nHint: ${err.additional}`));111* } else {112* // process tokens113* }114* ```115*/116export class Scanner {117118static getLexeme(token: Token): string {119switch (token.type) {120case TokenType.LParen:121return '(';122case TokenType.RParen:123return ')';124case TokenType.Neg:125return '!';126case TokenType.Eq:127return token.isTripleEq ? '===' : '==';128case TokenType.NotEq:129return token.isTripleEq ? '!==' : '!=';130case TokenType.Lt:131return '<';132case TokenType.LtEq:133return '<=';134case TokenType.Gt:135return '>=';136case TokenType.GtEq:137return '>=';138case TokenType.RegexOp:139return '=~';140case TokenType.RegexStr:141return token.lexeme;142case TokenType.True:143return 'true';144case TokenType.False:145return 'false';146case TokenType.In:147return 'in';148case TokenType.Not:149return 'not';150case TokenType.And:151return '&&';152case TokenType.Or:153return '||';154case TokenType.Str:155return token.lexeme;156case TokenType.QuotedStr:157return token.lexeme;158case TokenType.Error:159return token.lexeme;160case TokenType.EOF:161return 'EOF';162default:163throw illegalState(`unhandled token type: ${JSON.stringify(token)}; have you forgotten to add a case?`);164}165}166167private static _regexFlags = new Set(['i', 'g', 's', 'm', 'y', 'u'].map(ch => ch.charCodeAt(0)));168169private static _keywords = new Map<string, KeywordTokenType>([170['not', TokenType.Not],171['in', TokenType.In],172['false', TokenType.False],173['true', TokenType.True],174]);175176private _input: string = '';177private _start: number = 0;178private _current: number = 0;179private _tokens: Token[] = [];180private _errors: LexingError[] = [];181182get errors(): Readonly<LexingError[]> {183return this._errors;184}185186reset(value: string) {187this._input = value;188189this._start = 0;190this._current = 0;191this._tokens = [];192this._errors = [];193194return this;195}196197scan() {198while (!this._isAtEnd()) {199200this._start = this._current;201202const ch = this._advance();203switch (ch) {204case CharCode.OpenParen: this._addToken(TokenType.LParen); break;205case CharCode.CloseParen: this._addToken(TokenType.RParen); break;206207case CharCode.ExclamationMark:208if (this._match(CharCode.Equals)) {209const isTripleEq = this._match(CharCode.Equals); // eat last `=` if `!==`210this._tokens.push({ type: TokenType.NotEq, offset: this._start, isTripleEq });211} else {212this._addToken(TokenType.Neg);213}214break;215216case CharCode.SingleQuote: this._quotedString(); break;217case CharCode.Slash: this._regex(); break;218219case CharCode.Equals:220if (this._match(CharCode.Equals)) { // support `==`221const isTripleEq = this._match(CharCode.Equals); // eat last `=` if `===`222this._tokens.push({ type: TokenType.Eq, offset: this._start, isTripleEq });223} else if (this._match(CharCode.Tilde)) {224this._addToken(TokenType.RegexOp);225} else {226this._error(hintDidYouMean('==', '=~'));227}228break;229230case CharCode.LessThan: this._addToken(this._match(CharCode.Equals) ? TokenType.LtEq : TokenType.Lt); break;231232case CharCode.GreaterThan: this._addToken(this._match(CharCode.Equals) ? TokenType.GtEq : TokenType.Gt); break;233234case CharCode.Ampersand:235if (this._match(CharCode.Ampersand)) {236this._addToken(TokenType.And);237} else {238this._error(hintDidYouMean('&&'));239}240break;241242case CharCode.Pipe:243if (this._match(CharCode.Pipe)) {244this._addToken(TokenType.Or);245} else {246this._error(hintDidYouMean('||'));247}248break;249250// TODO@ulugbekna: 1) rewrite using a regex 2) reconsider what characters are considered whitespace, including unicode, nbsp, etc.251case CharCode.Space:252case CharCode.CarriageReturn:253case CharCode.Tab:254case CharCode.LineFeed:255case CharCode.NoBreakSpace: //  256break;257258default:259this._string();260}261}262263this._start = this._current;264this._addToken(TokenType.EOF);265266return Array.from(this._tokens);267}268269private _match(expected: number): boolean {270if (this._isAtEnd()) {271return false;272}273if (this._input.charCodeAt(this._current) !== expected) {274return false;275}276this._current++;277return true;278}279280private _advance(): number {281return this._input.charCodeAt(this._current++);282}283284private _peek(): number {285return this._isAtEnd() ? CharCode.Null : this._input.charCodeAt(this._current);286}287288private _addToken(type: TokenTypeWithoutLexeme) {289this._tokens.push({ type, offset: this._start });290}291292private _error(additional?: string) {293const offset = this._start;294const lexeme = this._input.substring(this._start, this._current);295const errToken: Token = { type: TokenType.Error, offset: this._start, lexeme };296this._errors.push({ offset, lexeme, additionalInfo: additional });297this._tokens.push(errToken);298}299300// u - unicode, y - sticky // TODO@ulugbekna: we accept double quotes as part of the string rather than as a delimiter (to preserve old parser's behavior)301private stringRe = /[a-zA-Z0-9_<>\-\./\\:\*\?\+\[\]\^,#@;"%\$\p{L}-]+/uy;302private _string() {303this.stringRe.lastIndex = this._start;304const match = this.stringRe.exec(this._input);305if (match) {306this._current = this._start + match[0].length;307const lexeme = this._input.substring(this._start, this._current);308const keyword = Scanner._keywords.get(lexeme);309if (keyword) {310this._addToken(keyword);311} else {312this._tokens.push({ type: TokenType.Str, lexeme, offset: this._start });313}314}315}316317// captures the lexeme without the leading and trailing '318private _quotedString() {319while (this._peek() !== CharCode.SingleQuote && !this._isAtEnd()) { // TODO@ulugbekna: add support for escaping ' ?320this._advance();321}322323if (this._isAtEnd()) {324this._error(hintDidYouForgetToOpenOrCloseQuote);325return;326}327328// consume the closing '329this._advance();330331this._tokens.push({ type: TokenType.QuotedStr, lexeme: this._input.substring(this._start + 1, this._current - 1), offset: this._start + 1 });332}333334/*335* Lexing a regex expression: /.../[igsmyu]*336* Based on https://github.com/microsoft/TypeScript/blob/9247ef115e617805983740ba795d7a8164babf89/src/compiler/scanner.ts#L2129-L2181337*338* Note that we want slashes within a regex to be escaped, e.g., /file:\\/\\/\\// should match `file:///`339*/340private _regex() {341let p = this._current;342343let inEscape = false;344let inCharacterClass = false;345while (true) {346if (p >= this._input.length) {347this._current = p;348this._error(hintDidYouForgetToEscapeSlash);349return;350}351352const ch = this._input.charCodeAt(p);353354if (inEscape) { // parsing an escape character355inEscape = false;356} else if (ch === CharCode.Slash && !inCharacterClass) { // end of regex357p++;358break;359} else if (ch === CharCode.OpenSquareBracket) {360inCharacterClass = true;361} else if (ch === CharCode.Backslash) {362inEscape = true;363} else if (ch === CharCode.CloseSquareBracket) {364inCharacterClass = false;365}366p++;367}368369// Consume flags // TODO@ulugbekna: use regex instead370while (p < this._input.length && Scanner._regexFlags.has(this._input.charCodeAt(p))) {371p++;372}373374this._current = p;375376const lexeme = this._input.substring(this._start, this._current);377this._tokens.push({ type: TokenType.RegexStr, lexeme, offset: this._start });378}379380private _isAtEnd() {381return this._current >= this._input.length;382}383}384385386