Path: blob/main/extensions/copilot/test/simulation/fixtures/codeMapper/scanner.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/4'use strict';56import { JSONScanner, ScanError, SyntaxKind } from './scannerTypes';78/**9* Creates a JSON scanner on the given text.10* If ignoreTrivia is set, whitespaces or comments are ignored.11*/12export function createScanner(text: string, ignoreTrivia: boolean = false): JSONScanner {1314const len = text.length;15let pos = 0,16value: string = '',17tokenOffset = 0,18token: SyntaxKind = SyntaxKind.Unknown,19lineNumber = 0,20lineStartOffset = 0,21tokenLineStartOffset = 0,22prevTokenLineStartOffset = 0,23scanError: ScanError = ScanError.None;2425function scanHexDigits(count: number, exact?: boolean): number {26let digits = 0;27let value = 0;28while (digits < count || !exact) {29let ch = text.charCodeAt(pos);30if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {31value = value * 16 + ch - CharacterCodes._0;32}33else if (ch >= CharacterCodes.A && ch <= CharacterCodes.F) {34value = value * 16 + ch - CharacterCodes.A + 10;35}36else if (ch >= CharacterCodes.a && ch <= CharacterCodes.f) {37value = value * 16 + ch - CharacterCodes.a + 10;38}39else {40break;41}42pos++;43digits++;44}45if (digits < count) {46value = -1;47}48return value;49}5051function setPosition(newPosition: number) {52pos = newPosition;53value = '';54tokenOffset = 0;55token = SyntaxKind.Unknown;56scanError = ScanError.None;57}5859function scanNumber(): string {60let start = pos;61if (text.charCodeAt(pos) === CharacterCodes._0) {62pos++;63} else {64pos++;65while (pos < text.length && isDigit(text.charCodeAt(pos))) {66pos++;67}68}69if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.dot) {70pos++;71if (pos < text.length && isDigit(text.charCodeAt(pos))) {72pos++;73while (pos < text.length && isDigit(text.charCodeAt(pos))) {74pos++;75}76} else {77scanError = ScanError.UnexpectedEndOfNumber;78return text.substring(start, pos);79}80}81let end = pos;82if (pos < text.length && (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e)) {83pos++;84if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) {85pos++;86}87if (pos < text.length && isDigit(text.charCodeAt(pos))) {88pos++;89while (pos < text.length && isDigit(text.charCodeAt(pos))) {90pos++;91}92end = pos;93} else {94scanError = ScanError.UnexpectedEndOfNumber;95}96}97return text.substring(start, end);98}99100function scanString(): string {101102let result = '',103start = pos;104105while (true) {106if (pos >= len) {107result += text.substring(start, pos);108scanError = ScanError.UnexpectedEndOfString;109break;110}111const ch = text.charCodeAt(pos);112if (ch === CharacterCodes.doubleQuote) {113result += text.substring(start, pos);114pos++;115break;116}117if (ch === CharacterCodes.backslash) {118result += text.substring(start, pos);119pos++;120if (pos >= len) {121scanError = ScanError.UnexpectedEndOfString;122break;123}124const ch2 = text.charCodeAt(pos++);125switch (ch2) {126case CharacterCodes.doubleQuote:127result += '\"';128break;129case CharacterCodes.backslash:130result += '\\';131break;132case CharacterCodes.slash:133result += '/';134break;135case CharacterCodes.b:136result += '\b';137break;138case CharacterCodes.f:139result += '\f';140break;141case CharacterCodes.n:142result += '\n';143break;144case CharacterCodes.r:145result += '\r';146break;147case CharacterCodes.t:148result += '\t';149break;150case CharacterCodes.u:151const ch3 = scanHexDigits(4, true);152if (ch3 >= 0) {153result += String.fromCharCode(ch3);154} else {155scanError = ScanError.InvalidUnicode;156}157break;158default:159scanError = ScanError.InvalidEscapeCharacter;160}161start = pos;162continue;163}164if (ch >= 0 && ch <= 0x1f) {165if (isLineBreak(ch)) {166result += text.substring(start, pos);167scanError = ScanError.UnexpectedEndOfString;168break;169} else {170scanError = ScanError.InvalidCharacter;171// mark as error but continue with string172}173}174pos++;175}176return result;177}178179function scanNext(): SyntaxKind {180181value = '';182scanError = ScanError.None;183184tokenOffset = pos;185lineStartOffset = lineNumber;186prevTokenLineStartOffset = tokenLineStartOffset;187188if (pos >= len) {189// at the end190tokenOffset = len;191return token = SyntaxKind.EOF;192}193194let code = text.charCodeAt(pos);195// trivia: whitespace196if (isWhiteSpace(code)) {197do {198pos++;199value += String.fromCharCode(code);200code = text.charCodeAt(pos);201} while (isWhiteSpace(code));202203return token = SyntaxKind.Trivia;204}205206// trivia: newlines207if (isLineBreak(code)) {208pos++;209value += String.fromCharCode(code);210if (code === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {211pos++;212value += '\n';213}214lineNumber++;215tokenLineStartOffset = pos;216return token = SyntaxKind.LineBreakTrivia;217}218219switch (code) {220// tokens: []{}:,221case CharacterCodes.openBrace:222pos++;223return token = SyntaxKind.OpenBraceToken;224case CharacterCodes.closeBrace:225pos++;226return token = SyntaxKind.CloseBraceToken;227case CharacterCodes.openBracket:228pos++;229return token = SyntaxKind.OpenBracketToken;230case CharacterCodes.closeBracket:231pos++;232return token = SyntaxKind.CloseBracketToken;233case CharacterCodes.colon:234pos++;235return token = SyntaxKind.ColonToken;236case CharacterCodes.comma:237pos++;238return token = SyntaxKind.CommaToken;239240// strings241case CharacterCodes.doubleQuote:242pos++;243value = scanString();244return token = SyntaxKind.StringLiteral;245246// comments247case CharacterCodes.slash:248const start = pos - 1;249// Single-line comment250if (text.charCodeAt(pos + 1) === CharacterCodes.slash) {251pos += 2;252253while (pos < len) {254if (isLineBreak(text.charCodeAt(pos))) {255break;256}257pos++;258259}260value = text.substring(start, pos);261return token = SyntaxKind.LineCommentTrivia;262}263264// Multi-line comment265if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) {266pos += 2;267268const safeLength = len - 1; // For lookahead.269let commentClosed = false;270while (pos < safeLength) {271const ch = text.charCodeAt(pos);272273if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) {274pos += 2;275commentClosed = true;276break;277}278279pos++;280281if (isLineBreak(ch)) {282if (ch === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {283pos++;284}285286lineNumber++;287tokenLineStartOffset = pos;288}289}290291if (!commentClosed) {292pos++;293scanError = ScanError.UnexpectedEndOfComment;294}295296value = text.substring(start, pos);297return token = SyntaxKind.BlockCommentTrivia;298}299// just a single slash300value += String.fromCharCode(code);301pos++;302return token = SyntaxKind.Unknown;303304// numbers305case CharacterCodes.minus:306value += String.fromCharCode(code);307pos++;308if (pos === len || !isDigit(text.charCodeAt(pos))) {309return token = SyntaxKind.Unknown;310}311// found a minus, followed by a number so312// we fall through to proceed with scanning313// numbers314case CharacterCodes._0:315case CharacterCodes._1:316case CharacterCodes._2:317case CharacterCodes._3:318case CharacterCodes._4:319case CharacterCodes._5:320case CharacterCodes._6:321case CharacterCodes._7:322case CharacterCodes._8:323case CharacterCodes._9:324value += scanNumber();325return token = SyntaxKind.NumericLiteral;326// literals and unknown symbols327default:328// is a literal? Read the full word.329while (pos < len && isUnknownContentCharacter(code)) {330pos++;331code = text.charCodeAt(pos);332}333if (tokenOffset !== pos) {334value = text.substring(tokenOffset, pos);335// keywords: true, false, null336switch (value) {337case 'true': return token = SyntaxKind.TrueKeyword;338case 'false': return token = SyntaxKind.FalseKeyword;339case 'null': return token = SyntaxKind.NullKeyword;340}341return token = SyntaxKind.Unknown;342}343// some344value += String.fromCharCode(code);345pos++;346return token = SyntaxKind.Unknown;347}348}349350function isUnknownContentCharacter(code: CharacterCodes) {351if (isWhiteSpace(code) || isLineBreak(code)) {352return false;353}354switch (code) {355case CharacterCodes.closeBrace:356case CharacterCodes.closeBracket:357case CharacterCodes.openBrace:358case CharacterCodes.openBracket:359case CharacterCodes.doubleQuote:360case CharacterCodes.colon:361case CharacterCodes.comma:362case CharacterCodes.slash:363return false;364}365return true;366}367368369function scanNextNonTrivia(): SyntaxKind {370let result: SyntaxKind;371do {372result = scanNext();373} while (result >= SyntaxKind.LineCommentTrivia && result <= SyntaxKind.Trivia);374return result;375}376377return {378setPosition: setPosition,379getPosition: () => pos,380scan: ignoreTrivia ? scanNextNonTrivia : scanNext,381getToken: () => token,382getTokenValue: () => value,383getTokenOffset: () => tokenOffset,384getTokenLength: () => pos - tokenOffset,385getTokenStartLine: () => lineStartOffset,386getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,387getTokenError: () => scanError,388};389}390391function isWhiteSpace(ch: number): boolean {392return ch === CharacterCodes.space || ch === CharacterCodes.tab;393}394395function isLineBreak(ch: number): boolean {396return ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn;397}398399function isDigit(ch: number): boolean {400return ch >= CharacterCodes._0 && ch <= CharacterCodes._9;401}402403const enum CharacterCodes {404lineFeed = 0x0A, // \n405carriageReturn = 0x0D, // \r406407space = 0x0020, // " "408409_0 = 0x30,410_1 = 0x31,411_2 = 0x32,412_3 = 0x33,413_4 = 0x34,414_5 = 0x35,415_6 = 0x36,416_7 = 0x37,417_8 = 0x38,418_9 = 0x39,419420a = 0x61,421b = 0x62,422c = 0x63,423d = 0x64,424e = 0x65,425f = 0x66,426g = 0x67,427h = 0x68,428i = 0x69,429j = 0x6A,430k = 0x6B,431l = 0x6C,432m = 0x6D,433n = 0x6E,434o = 0x6F,435p = 0x70,436q = 0x71,437r = 0x72,438s = 0x73,439t = 0x74,440u = 0x75,441v = 0x76,442w = 0x77,443x = 0x78,444y = 0x79,445z = 0x7A,446447A = 0x41,448B = 0x42,449C = 0x43,450D = 0x44,451E = 0x45,452F = 0x46,453G = 0x47,454H = 0x48,455I = 0x49,456J = 0x4A,457K = 0x4B,458L = 0x4C,459M = 0x4D,460N = 0x4E,461O = 0x4F,462P = 0x50,463Q = 0x51,464R = 0x52,465S = 0x53,466T = 0x54,467U = 0x55,468V = 0x56,469W = 0x57,470X = 0x58,471Y = 0x59,472Z = 0x5a,473474asterisk = 0x2A, // *475backslash = 0x5C, // \476closeBrace = 0x7D, // }477closeBracket = 0x5D, // ]478colon = 0x3A, // :479comma = 0x2C, // ,480dot = 0x2E, // .481doubleQuote = 0x22, // "482minus = 0x2D, // -483openBrace = 0x7B, // {484openBracket = 0x5B, // [485plus = 0x2B, // +486slash = 0x2F, // /487488formFeed = 0x0C, // \f489tab = 0x09, // \t490}491492493