Path: blob/main/src/vs/editor/test/common/modes/textToHtmlTokenizer.test.ts
5253 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import assert from 'assert';6import { Disposable, DisposableStore } from '../../../../base/common/lifecycle.js';7import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';8import { ColorId, FontStyle, MetadataConsts } from '../../../common/encodedTokenAttributes.js';9import { EncodedTokenizationResult, IState, TokenizationRegistry } from '../../../common/languages.js';10import { ILanguageService } from '../../../common/languages/language.js';11import { _tokenizeToString, tokenizeLineToHTML } from '../../../common/languages/textToHtmlTokenizer.js';12import { LanguageIdCodec } from '../../../common/services/languagesRegistry.js';13import { TestLineToken, TestLineTokens } from '../core/testLineToken.js';14import { createModelServices } from '../testTextModel.js';15import { TestInstantiationService } from '../../../../platform/instantiation/test/common/instantiationServiceMock.js';1617suite('Editor Modes - textToHtmlTokenizer', () => {1819let disposables: DisposableStore;20let instantiationService: TestInstantiationService;2122setup(() => {23disposables = new DisposableStore();24instantiationService = createModelServices(disposables);25});2627teardown(() => {28disposables.dispose();29});3031ensureNoDisposablesAreLeakedInTestSuite();3233function toStr(pieces: { className: string; text: string }[]): string {34const resultArr = pieces.map((t) => `<span class="${t.className}">${t.text}</span>`);35return resultArr.join('');36}3738test('TextToHtmlTokenizer 1', () => {39const mode = disposables.add(instantiationService.createInstance(Mode));40const support = TokenizationRegistry.get(mode.languageId)!;4142const actual = _tokenizeToString('.abc..def...gh', new LanguageIdCodec(), support);43const expected = [44{ className: 'mtk7', text: '.' },45{ className: 'mtk9', text: 'abc' },46{ className: 'mtk7', text: '..' },47{ className: 'mtk9', text: 'def' },48{ className: 'mtk7', text: '...' },49{ className: 'mtk9', text: 'gh' },50];51const expectedStr = `<div class="monaco-tokenized-source">${toStr(expected)}</div>`;5253assert.strictEqual(actual, expectedStr);54});5556test('TextToHtmlTokenizer 2', () => {57const mode = disposables.add(instantiationService.createInstance(Mode));58const support = TokenizationRegistry.get(mode.languageId)!;5960const actual = _tokenizeToString('.abc..def...gh\n.abc..def...gh', new LanguageIdCodec(), support);61const expected1 = [62{ className: 'mtk7', text: '.' },63{ className: 'mtk9', text: 'abc' },64{ className: 'mtk7', text: '..' },65{ className: 'mtk9', text: 'def' },66{ className: 'mtk7', text: '...' },67{ className: 'mtk9', text: 'gh' },68];69const expected2 = [70{ className: 'mtk7', text: '.' },71{ className: 'mtk9', text: 'abc' },72{ className: 'mtk7', text: '..' },73{ className: 'mtk9', text: 'def' },74{ className: 'mtk7', text: '...' },75{ className: 'mtk9', text: 'gh' },76];77const expectedStr1 = toStr(expected1);78const expectedStr2 = toStr(expected2);79const expectedStr = `<div class="monaco-tokenized-source">${expectedStr1}<br/>${expectedStr2}</div>`;8081assert.strictEqual(actual, expectedStr);82});8384test('tokenizeLineToHTML', () => {85const text = 'Ciao hello world!';86const lineTokens = new TestLineTokens([87new TestLineToken(884,89(90(3 << MetadataConsts.FOREGROUND_OFFSET)91| ((FontStyle.Bold | FontStyle.Italic) << MetadataConsts.FONT_STYLE_OFFSET)92) >>> 093),94new TestLineToken(955,96(97(1 << MetadataConsts.FOREGROUND_OFFSET)98) >>> 099),100new TestLineToken(10110,102(103(4 << MetadataConsts.FOREGROUND_OFFSET)104) >>> 0105),106new TestLineToken(10711,108(109(1 << MetadataConsts.FOREGROUND_OFFSET)110) >>> 0111),112new TestLineToken(11317,114(115(5 << MetadataConsts.FOREGROUND_OFFSET)116| ((FontStyle.Underline) << MetadataConsts.FONT_STYLE_OFFSET)117) >>> 0118)119]);120const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00', '#0000ff'];121122assert.strictEqual(123tokenizeLineToHTML(text, lineTokens, colorMap, 0, 17, 4, true),124[125'<div>',126'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',127'<span style="color: #000000;"> </span>',128'<span style="color: #00ff00;">hello</span>',129'<span style="color: #000000;"> </span>',130'<span style="color: #0000ff;text-decoration: underline;">world!</span>',131'</div>'132].join('')133);134135assert.strictEqual(136tokenizeLineToHTML(text, lineTokens, colorMap, 0, 12, 4, true),137[138'<div>',139'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',140'<span style="color: #000000;"> </span>',141'<span style="color: #00ff00;">hello</span>',142'<span style="color: #000000;"> </span>',143'<span style="color: #0000ff;text-decoration: underline;">w</span>',144'</div>'145].join('')146);147148assert.strictEqual(149tokenizeLineToHTML(text, lineTokens, colorMap, 0, 11, 4, true),150[151'<div>',152'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',153'<span style="color: #000000;"> </span>',154'<span style="color: #00ff00;">hello</span>',155'<span style="color: #000000;"> </span>',156'</div>'157].join('')158);159160assert.strictEqual(161tokenizeLineToHTML(text, lineTokens, colorMap, 1, 11, 4, true),162[163'<div>',164'<span style="color: #ff0000;font-style: italic;font-weight: bold;">iao</span>',165'<span style="color: #000000;"> </span>',166'<span style="color: #00ff00;">hello</span>',167'<span style="color: #000000;"> </span>',168'</div>'169].join('')170);171172assert.strictEqual(173tokenizeLineToHTML(text, lineTokens, colorMap, 4, 11, 4, true),174[175'<div>',176'<span style="color: #000000;"> </span>',177'<span style="color: #00ff00;">hello</span>',178'<span style="color: #000000;"> </span>',179'</div>'180].join('')181);182183assert.strictEqual(184tokenizeLineToHTML(text, lineTokens, colorMap, 5, 11, 4, true),185[186'<div>',187'<span style="color: #00ff00;">hello</span>',188'<span style="color: #000000;"> </span>',189'</div>'190].join('')191);192193assert.strictEqual(194tokenizeLineToHTML(text, lineTokens, colorMap, 5, 10, 4, true),195[196'<div>',197'<span style="color: #00ff00;">hello</span>',198'</div>'199].join('')200);201202assert.strictEqual(203tokenizeLineToHTML(text, lineTokens, colorMap, 6, 9, 4, true),204[205'<div>',206'<span style="color: #00ff00;">ell</span>',207'</div>'208].join('')209);210});211test('tokenizeLineToHTML handle spaces #35954', () => {212const text = ' Ciao hello world!';213const lineTokens = new TestLineTokens([214new TestLineToken(2152,216(217(1 << MetadataConsts.FOREGROUND_OFFSET)218) >>> 0219),220new TestLineToken(2216,222(223(3 << MetadataConsts.FOREGROUND_OFFSET)224| ((FontStyle.Bold | FontStyle.Italic) << MetadataConsts.FONT_STYLE_OFFSET)225) >>> 0226),227new TestLineToken(2289,229(230(1 << MetadataConsts.FOREGROUND_OFFSET)231) >>> 0232),233new TestLineToken(23414,235(236(4 << MetadataConsts.FOREGROUND_OFFSET)237) >>> 0238),239new TestLineToken(24015,241(242(1 << MetadataConsts.FOREGROUND_OFFSET)243) >>> 0244),245new TestLineToken(24621,247(248(5 << MetadataConsts.FOREGROUND_OFFSET)249| ((FontStyle.Underline) << MetadataConsts.FONT_STYLE_OFFSET)250) >>> 0251)252]);253const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00', '#0000ff'];254255assert.strictEqual(256tokenizeLineToHTML(text, lineTokens, colorMap, 0, 21, 4, true),257[258'<div>',259'<span style="color: #000000;">  </span>',260'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',261'<span style="color: #000000;">   </span>',262'<span style="color: #00ff00;">hello</span>',263'<span style="color: #000000;"> </span>',264'<span style="color: #0000ff;text-decoration: underline;">world!</span>',265'</div>'266].join('')267);268269assert.strictEqual(270tokenizeLineToHTML(text, lineTokens, colorMap, 0, 17, 4, true),271[272'<div>',273'<span style="color: #000000;">  </span>',274'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',275'<span style="color: #000000;">   </span>',276'<span style="color: #00ff00;">hello</span>',277'<span style="color: #000000;"> </span>',278'<span style="color: #0000ff;text-decoration: underline;">wo</span>',279'</div>'280].join('')281);282283assert.strictEqual(284tokenizeLineToHTML(text, lineTokens, colorMap, 0, 3, 4, true),285[286'<div>',287'<span style="color: #000000;">  </span>',288'<span style="color: #ff0000;font-style: italic;font-weight: bold;">C</span>',289'</div>'290].join('')291);292});293294test('tokenizeLineToHTML with tabs and non-zero startOffset #263387', () => {295// This test demonstrates the issue where tab padding is calculated incorrectly296// when startOffset is non-zero and there are tabs AFTER the start position.297// The bug: tabsCharDelta doesn't account for characters before startOffset.298299const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00'];300301// Critical test case: "\ta\tb" starting at position 2 (skipping first tab and 'a')302// Layout: First tab (pos 0) goes to column 4, 'a' (pos 1) at column 4,303// second tab (pos 2) should go from column 5 to column 8 (3 spaces)304// With the bug: charIndex starts at 2, tabsCharDelta=0 (first tab was never seen)305// When processing second tab: insertSpacesCount = 4 - (2 + 0) % 4 = 2 spaces (WRONG!)306// The old code thinks it's at column 2, but it's actually at column 5307const text = '\ta\tb';308const lineTokens = new TestLineTokens([309new TestLineToken(3101,311(312(1 << MetadataConsts.FOREGROUND_OFFSET)313) >>> 0314),315new TestLineToken(3162,317(318(3 << MetadataConsts.FOREGROUND_OFFSET)319) >>> 0320),321new TestLineToken(3223,323(324(1 << MetadataConsts.FOREGROUND_OFFSET)325) >>> 0326),327new TestLineToken(3284,329(330(4 << MetadataConsts.FOREGROUND_OFFSET)331) >>> 0332)333]);334335// First, verify the full line works correctly336assert.strictEqual(337tokenizeLineToHTML(text, lineTokens, colorMap, 0, 4, 4, true),338[339'<div>',340'<span style="color: #000000;">    </span>', // First tab: 4 spaces341'<span style="color: #ff0000;">a</span>', // 'a' at column 4342'<span style="color: #000000;">   </span>', // Second tab: 3 spaces (column 5 to 8)343'<span style="color: #00ff00;">b</span>',344'</div>'345].join('')346);347348// THE BUG: Starting at position 2 (after first tab and 'a')349// Expected (with fix): 3 spaces for the second tab (column 5 to 8)350// Buggy behavior (old code): 2 spaces (thinks it's at column 2, gives   )351// The fix correctly accounts for the skipped tab and 'a', outputting    352assert.strictEqual(353tokenizeLineToHTML(text, lineTokens, colorMap, 2, 4, 4, true),354[355'<div>',356'<span style="color: #000000;">   </span>', // With fix: 3 spaces; with bug: only 2 spaces357'<span style="color: #00ff00;">b</span>',358'</div>'359].join('')360);361});362363});364365class Mode extends Disposable {366367public readonly languageId = 'textToHtmlTokenizerMode';368369constructor(370@ILanguageService languageService: ILanguageService371) {372super();373this._register(languageService.registerLanguage({ id: this.languageId }));374this._register(TokenizationRegistry.register(this.languageId, {375getInitialState: (): IState => null!,376tokenize: undefined!,377tokenizeEncoded: (line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult => {378const tokensArr: number[] = [];379let prevColor = -1 as ColorId;380for (let i = 0; i < line.length; i++) {381const colorId = (line.charAt(i) === '.' ? 7 : 9) as ColorId;382if (prevColor !== colorId) {383tokensArr.push(i);384tokensArr.push((385colorId << MetadataConsts.FOREGROUND_OFFSET386) >>> 0);387}388prevColor = colorId;389}390391const tokens = new Uint32Array(tokensArr.length);392for (let i = 0; i < tokens.length; i++) {393tokens[i] = tokensArr[i];394}395return new EncodedTokenizationResult(tokens, [], null!);396}397}));398}399}400401402