Path: blob/main/src/vs/editor/standalone/test/browser/monarch.test.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import assert from 'assert';6import { DisposableStore } from '../../../../base/common/lifecycle.js';7import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';8import { Token, TokenizationRegistry } from '../../../common/languages.js';9import { ILanguageService } from '../../../common/languages/language.js';10import { LanguageService } from '../../../common/services/languageService.js';11import { StandaloneConfigurationService } from '../../browser/standaloneServices.js';12import { compile } from '../../common/monarch/monarchCompile.js';13import { MonarchTokenizer } from '../../common/monarch/monarchLexer.js';14import { IMonarchLanguage } from '../../common/monarch/monarchTypes.js';15import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';16import { NullLogService } from '../../../../platform/log/common/log.js';1718suite('Monarch', () => {1920ensureNoDisposablesAreLeakedInTestSuite();2122function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {23return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);24}2526function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {27const actualTokens: Token[][] = [];28let state = tokenizer.getInitialState();29for (const line of lines) {30const result = tokenizer.tokenize(line, true, state);31actualTokens.push(result.tokens);32state = result.endState;33}34return actualTokens;35}3637test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {38const disposables = new DisposableStore();39const languageService = disposables.add(new LanguageService());40const configurationService = new StandaloneConfigurationService(new NullLogService());41disposables.add(languageService.registerLanguage({ id: 'sql' }));42disposables.add(TokenizationRegistry.register('sql', disposables.add(createMonarchTokenizer(languageService, 'sql', {43tokenizer: {44root: [45[/./, 'token']46]47}48}, configurationService))));49const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';50const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test1', {51tokenizer: {52root: [53[`(\"\"\")${SQL_QUERY_START}`, [{ 'token': 'string.quote', }, { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },]],54[/(""")$/, [{ token: 'string.quote', next: '@maybeStringIsSQL', },]],55],56maybeStringIsSQL: [57[/(.*)/, {58cases: {59[`${SQL_QUERY_START}\\b.*`]: { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },60'@default': { token: '@rematch', switchTo: '@endDblDocString', },61}62}],63],64endDblDocString: [65['[^\']+', 'string'],66['\\\\\'', 'string'],67['\'\'\'', 'string', '@popall'],68['\'', 'string']69],70endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],71}72}, configurationService));7374const lines = [75`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,76`mysql_query("""`,77`SELECT *`,78`FROM table_name`,79`WHERE ds = '<DATEID>'`,80`""")`,81];8283const actualTokens = getTokens(tokenizer, lines);8485assert.deepStrictEqual(actualTokens, [86[87new Token(0, 'source.test1', 'test1'),88new Token(12, 'string.quote.test1', 'test1'),89new Token(15, 'token.sql', 'sql'),90new Token(61, 'string.quote.test1', 'test1'),91new Token(64, 'source.test1', 'test1')92],93[94new Token(0, 'source.test1', 'test1'),95new Token(12, 'string.quote.test1', 'test1')96],97[98new Token(0, 'token.sql', 'sql')99],100[101new Token(0, 'token.sql', 'sql')102],103[104new Token(0, 'token.sql', 'sql')105],106[107new Token(0, 'string.quote.test1', 'test1'),108new Token(3, 'source.test1', 'test1')109]110]);111disposables.dispose();112});113114test('Test nextEmbedded: "@pop" in cases statement', () => {115const disposables = new DisposableStore();116const languageService = disposables.add(new LanguageService());117const configurationService = new StandaloneConfigurationService(new NullLogService());118disposables.add(languageService.registerLanguage({ id: 'sql' }));119disposables.add(TokenizationRegistry.register('sql', disposables.add(createMonarchTokenizer(languageService, 'sql', {120tokenizer: {121root: [122[/./, 'token']123]124}125}, configurationService))));126const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';127const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test1', {128tokenizer: {129root: [130[`(\"\"\")${SQL_QUERY_START}`, [{ 'token': 'string.quote', }, { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },]],131[/(""")$/, [{ token: 'string.quote', next: '@maybeStringIsSQL', },]],132],133maybeStringIsSQL: [134[/(.*)/, {135cases: {136[`${SQL_QUERY_START}\\b.*`]: { token: '@rematch', next: '@endStringWithSQL', nextEmbedded: 'sql', },137'@default': { token: '@rematch', switchTo: '@endDblDocString', },138}139}],140],141endDblDocString: [142['[^\']+', 'string'],143['\\\\\'', 'string'],144['\'\'\'', 'string', '@popall'],145['\'', 'string']146],147endStringWithSQL: [[/"""/, {148cases: {149'"""': {150cases: {151'': { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', }152}153},154'@default': ''155}156}]],157}158}, configurationService));159160const lines = [161`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,162`mysql_query("""`,163`SELECT *`,164`FROM table_name`,165`WHERE ds = '<DATEID>'`,166`""")`,167];168169const actualTokens = getTokens(tokenizer, lines);170171assert.deepStrictEqual(actualTokens, [172[173new Token(0, 'source.test1', 'test1'),174new Token(12, 'string.quote.test1', 'test1'),175new Token(15, 'token.sql', 'sql'),176new Token(61, 'string.quote.test1', 'test1'),177new Token(64, 'source.test1', 'test1')178],179[180new Token(0, 'source.test1', 'test1'),181new Token(12, 'string.quote.test1', 'test1')182],183[184new Token(0, 'token.sql', 'sql')185],186[187new Token(0, 'token.sql', 'sql')188],189[190new Token(0, 'token.sql', 'sql')191],192[193new Token(0, 'string.quote.test1', 'test1'),194new Token(3, 'source.test1', 'test1')195]196]);197disposables.dispose();198});199200201test('microsoft/monaco-editor#1235: Empty Line Handling', () => {202const disposables = new DisposableStore();203const configurationService = new StandaloneConfigurationService(new NullLogService());204const languageService = disposables.add(new LanguageService());205const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {206tokenizer: {207root: [208{ include: '@comments' },209],210211comments: [212[/\/\/$/, 'comment'], // empty single-line comment213[/\/\//, 'comment', '@comment_cpp'],214],215216comment_cpp: [217[/(?:[^\\]|(?:\\.))+$/, 'comment', '@pop'],218[/.+$/, 'comment'],219[/$/, 'comment', '@pop']220// No possible rule to detect an empty line and @pop?221],222},223}, configurationService));224225const lines = [226`// This comment \\`,227` continues on the following line`,228``,229`// This comment does NOT continue \\\\`,230` because the escape char was itself escaped`,231``,232`// This comment DOES continue because \\\\\\`,233` the 1st '\\' escapes the 2nd; the 3rd escapes EOL`,234``,235`// This comment continues to the following line \\`,236``,237`But the line was empty. This line should not be commented.`,238];239240const actualTokens = getTokens(tokenizer, lines);241242assert.deepStrictEqual(actualTokens, [243[new Token(0, 'comment.test', 'test')],244[new Token(0, 'comment.test', 'test')],245[],246[new Token(0, 'comment.test', 'test')],247[new Token(0, 'source.test', 'test')],248[],249[new Token(0, 'comment.test', 'test')],250[new Token(0, 'comment.test', 'test')],251[],252[new Token(0, 'comment.test', 'test')],253[],254[new Token(0, 'source.test', 'test')]255]);256257disposables.dispose();258});259260test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {261const disposables = new DisposableStore();262const configurationService = new StandaloneConfigurationService(new NullLogService());263const languageService = disposables.add(new LanguageService());264const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {265includeLF: true,266tokenizer: {267root: [268[/^\*/, '', '@inner'],269[/\:\*/, '', '@inner'],270[/[^*:]+/, 'string'],271[/[*:]/, 'string']272],273inner: [274[/\n/, '', '@pop'],275[/\d+/, 'number'],276[/[^\d]+/, '']277]278}279}, configurationService));280281const lines = [282`PRINT 10 * 20`,283`*FX200, 3`,284`PRINT 2*3:*FX200, 3`285];286287const actualTokens = getTokens(tokenizer, lines);288289assert.deepStrictEqual(actualTokens, [290[291new Token(0, 'string.test', 'test'),292],293[294new Token(0, '', 'test'),295new Token(3, 'number.test', 'test'),296new Token(6, '', 'test'),297new Token(8, 'number.test', 'test'),298],299[300new Token(0, 'string.test', 'test'),301new Token(9, '', 'test'),302new Token(13, 'number.test', 'test'),303new Token(16, '', 'test'),304new Token(18, 'number.test', 'test'),305]306]);307308disposables.dispose();309});310311test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {312const disposables = new DisposableStore();313const configurationService = new StandaloneConfigurationService(new NullLogService());314const languageService = disposables.add(new LanguageService());315316const tokenizer1 = disposables.add(createMonarchTokenizer(languageService, 'test', {317ignoreCase: false,318uselessReplaceKey1: '@uselessReplaceKey2',319uselessReplaceKey2: '@uselessReplaceKey3',320uselessReplaceKey3: '@uselessReplaceKey4',321uselessReplaceKey4: '@uselessReplaceKey5',322uselessReplaceKey5: '@ham',323tokenizer: {324root: [325{326regex: /@\w+/.test('@ham')327? new RegExp(`^${'@uselessReplaceKey1'}$`)328: new RegExp(`^${'@ham'}$`),329action: { token: 'ham' }330},331],332},333}, configurationService));334335const tokenizer2 = disposables.add(createMonarchTokenizer(languageService, 'test', {336ignoreCase: false,337tokenizer: {338root: [339{340regex: /@@ham/,341action: { token: 'ham' }342},343],344},345}, configurationService));346347const lines = [348`@ham`349];350351const actualTokens1 = getTokens(tokenizer1, lines);352assert.deepStrictEqual(actualTokens1, [353[354new Token(0, 'ham.test', 'test'),355]356]);357358const actualTokens2 = getTokens(tokenizer2, lines);359assert.deepStrictEqual(actualTokens2, [360[361new Token(0, 'ham.test', 'test'),362]363]);364365disposables.dispose();366});367368test('microsoft/monaco-editor#2424: Allow to target @@', () => {369const disposables = new DisposableStore();370const configurationService = new StandaloneConfigurationService(new NullLogService());371const languageService = disposables.add(new LanguageService());372373const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {374ignoreCase: false,375tokenizer: {376root: [377{378regex: /@@@@/,379action: { token: 'ham' }380},381],382},383}, configurationService));384385const lines = [386`@@`387];388389const actualTokens = getTokens(tokenizer, lines);390assert.deepStrictEqual(actualTokens, [391[392new Token(0, 'ham.test', 'test'),393]394]);395396disposables.dispose();397});398399test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {400const disposables = new DisposableStore();401402const configurationService = new StandaloneConfigurationService(new NullLogService());403const languageService = disposables.add(new LanguageService());404405// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail406await configurationService.updateValue('editor.maxTokenizationLineLength', 4);407408const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {409tokenizer: {410root: [411{412regex: /ham/,413action: { token: 'ham' }414},415],416},417}, configurationService));418419const lines = [420'ham', // length 3, should be tokenized421'hamham' // length 6, should NOT be tokenized422];423424const actualTokens = getTokens(tokenizer, lines);425assert.deepStrictEqual(actualTokens, [426[427new Token(0, 'ham.test', 'test'),428], [429new Token(0, '', 'test')430]431]);432433disposables.dispose();434});435436test('microsoft/monaco-editor#3128: allow state access within rules', () => {437const disposables = new DisposableStore();438const configurationService = new StandaloneConfigurationService(new NullLogService());439const languageService = disposables.add(new LanguageService());440441const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {442ignoreCase: false,443encoding: /u|u8|U|L/,444tokenizer: {445root: [446// C++ 11 Raw String447[/@encoding?R\"(?:([^ ()\\\t]*))\(/, { token: 'string.raw.begin', next: '@raw.$1' }],448],449450raw: [451[/.*\)$S2\"/, 'string.raw', '@pop'],452[/.*/, 'string.raw']453],454},455}, configurationService));456457const lines = [458`int main(){`,459``,460` auto s = R""""(`,461` Hello World`,462` )"""";`,463``,464` std::cout << "hello";`,465``,466`}`,467];468469const actualTokens = getTokens(tokenizer, lines);470assert.deepStrictEqual(actualTokens, [471[new Token(0, 'source.test', 'test')],472[],473[new Token(0, 'source.test', 'test'), new Token(10, 'string.raw.begin.test', 'test')],474[new Token(0, 'string.raw.test', 'test')],475[new Token(0, 'string.raw.test', 'test'), new Token(6, 'source.test', 'test')],476[],477[new Token(0, 'source.test', 'test')],478[],479[new Token(0, 'source.test', 'test')],480]);481482disposables.dispose();483});484485test('microsoft/monaco-editor#4775: Raw-strings in c++ can break monarch', () => {486const disposables = new DisposableStore();487const configurationService = new StandaloneConfigurationService(new NullLogService());488const languageService = disposables.add(new LanguageService());489490const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {491ignoreCase: false,492encoding: /u|u8|U|L/,493tokenizer: {494root: [495// C++ 11 Raw String496[/@encoding?R\"(?:([^ ()\\\t]*))\(/, { token: 'string.raw.begin', next: '@raw.$1' }],497],498499raw: [500[/.*\)$S2\"/, 'string.raw', '@pop'],501[/.*/, 'string.raw']502],503},504}, configurationService));505506const lines = [507`R"[())"`,508];509510const actualTokens = getTokens(tokenizer, lines);511assert.deepStrictEqual(actualTokens, [512[new Token(0, 'string.raw.begin.test', 'test'), new Token(4, 'string.raw.test', 'test')],513]);514515disposables.dispose();516});517518});519520521