Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/test/common/modes/textToHtmlTokenizer.test.ts
5253 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import assert from 'assert';
7
import { Disposable, DisposableStore } from '../../../../base/common/lifecycle.js';
8
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js';
9
import { ColorId, FontStyle, MetadataConsts } from '../../../common/encodedTokenAttributes.js';
10
import { EncodedTokenizationResult, IState, TokenizationRegistry } from '../../../common/languages.js';
11
import { ILanguageService } from '../../../common/languages/language.js';
12
import { _tokenizeToString, tokenizeLineToHTML } from '../../../common/languages/textToHtmlTokenizer.js';
13
import { LanguageIdCodec } from '../../../common/services/languagesRegistry.js';
14
import { TestLineToken, TestLineTokens } from '../core/testLineToken.js';
15
import { createModelServices } from '../testTextModel.js';
16
import { TestInstantiationService } from '../../../../platform/instantiation/test/common/instantiationServiceMock.js';
17
18
suite('Editor Modes - textToHtmlTokenizer', () => {
19
20
let disposables: DisposableStore;
21
let instantiationService: TestInstantiationService;
22
23
setup(() => {
24
disposables = new DisposableStore();
25
instantiationService = createModelServices(disposables);
26
});
27
28
teardown(() => {
29
disposables.dispose();
30
});
31
32
ensureNoDisposablesAreLeakedInTestSuite();
33
34
function toStr(pieces: { className: string; text: string }[]): string {
35
const resultArr = pieces.map((t) => `<span class="${t.className}">${t.text}</span>`);
36
return resultArr.join('');
37
}
38
39
test('TextToHtmlTokenizer 1', () => {
40
const mode = disposables.add(instantiationService.createInstance(Mode));
41
const support = TokenizationRegistry.get(mode.languageId)!;
42
43
const actual = _tokenizeToString('.abc..def...gh', new LanguageIdCodec(), support);
44
const expected = [
45
{ className: 'mtk7', text: '.' },
46
{ className: 'mtk9', text: 'abc' },
47
{ className: 'mtk7', text: '..' },
48
{ className: 'mtk9', text: 'def' },
49
{ className: 'mtk7', text: '...' },
50
{ className: 'mtk9', text: 'gh' },
51
];
52
const expectedStr = `<div class="monaco-tokenized-source">${toStr(expected)}</div>`;
53
54
assert.strictEqual(actual, expectedStr);
55
});
56
57
test('TextToHtmlTokenizer 2', () => {
58
const mode = disposables.add(instantiationService.createInstance(Mode));
59
const support = TokenizationRegistry.get(mode.languageId)!;
60
61
const actual = _tokenizeToString('.abc..def...gh\n.abc..def...gh', new LanguageIdCodec(), support);
62
const expected1 = [
63
{ className: 'mtk7', text: '.' },
64
{ className: 'mtk9', text: 'abc' },
65
{ className: 'mtk7', text: '..' },
66
{ className: 'mtk9', text: 'def' },
67
{ className: 'mtk7', text: '...' },
68
{ className: 'mtk9', text: 'gh' },
69
];
70
const expected2 = [
71
{ className: 'mtk7', text: '.' },
72
{ className: 'mtk9', text: 'abc' },
73
{ className: 'mtk7', text: '..' },
74
{ className: 'mtk9', text: 'def' },
75
{ className: 'mtk7', text: '...' },
76
{ className: 'mtk9', text: 'gh' },
77
];
78
const expectedStr1 = toStr(expected1);
79
const expectedStr2 = toStr(expected2);
80
const expectedStr = `<div class="monaco-tokenized-source">${expectedStr1}<br/>${expectedStr2}</div>`;
81
82
assert.strictEqual(actual, expectedStr);
83
});
84
85
test('tokenizeLineToHTML', () => {
86
const text = 'Ciao hello world!';
87
const lineTokens = new TestLineTokens([
88
new TestLineToken(
89
4,
90
(
91
(3 << MetadataConsts.FOREGROUND_OFFSET)
92
| ((FontStyle.Bold | FontStyle.Italic) << MetadataConsts.FONT_STYLE_OFFSET)
93
) >>> 0
94
),
95
new TestLineToken(
96
5,
97
(
98
(1 << MetadataConsts.FOREGROUND_OFFSET)
99
) >>> 0
100
),
101
new TestLineToken(
102
10,
103
(
104
(4 << MetadataConsts.FOREGROUND_OFFSET)
105
) >>> 0
106
),
107
new TestLineToken(
108
11,
109
(
110
(1 << MetadataConsts.FOREGROUND_OFFSET)
111
) >>> 0
112
),
113
new TestLineToken(
114
17,
115
(
116
(5 << MetadataConsts.FOREGROUND_OFFSET)
117
| ((FontStyle.Underline) << MetadataConsts.FONT_STYLE_OFFSET)
118
) >>> 0
119
)
120
]);
121
const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00', '#0000ff'];
122
123
assert.strictEqual(
124
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 17, 4, true),
125
[
126
'<div>',
127
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',
128
'<span style="color: #000000;"> </span>',
129
'<span style="color: #00ff00;">hello</span>',
130
'<span style="color: #000000;"> </span>',
131
'<span style="color: #0000ff;text-decoration: underline;">world!</span>',
132
'</div>'
133
].join('')
134
);
135
136
assert.strictEqual(
137
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 12, 4, true),
138
[
139
'<div>',
140
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',
141
'<span style="color: #000000;"> </span>',
142
'<span style="color: #00ff00;">hello</span>',
143
'<span style="color: #000000;"> </span>',
144
'<span style="color: #0000ff;text-decoration: underline;">w</span>',
145
'</div>'
146
].join('')
147
);
148
149
assert.strictEqual(
150
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 11, 4, true),
151
[
152
'<div>',
153
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',
154
'<span style="color: #000000;"> </span>',
155
'<span style="color: #00ff00;">hello</span>',
156
'<span style="color: #000000;"> </span>',
157
'</div>'
158
].join('')
159
);
160
161
assert.strictEqual(
162
tokenizeLineToHTML(text, lineTokens, colorMap, 1, 11, 4, true),
163
[
164
'<div>',
165
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">iao</span>',
166
'<span style="color: #000000;"> </span>',
167
'<span style="color: #00ff00;">hello</span>',
168
'<span style="color: #000000;"> </span>',
169
'</div>'
170
].join('')
171
);
172
173
assert.strictEqual(
174
tokenizeLineToHTML(text, lineTokens, colorMap, 4, 11, 4, true),
175
[
176
'<div>',
177
'<span style="color: #000000;">&#160;</span>',
178
'<span style="color: #00ff00;">hello</span>',
179
'<span style="color: #000000;"> </span>',
180
'</div>'
181
].join('')
182
);
183
184
assert.strictEqual(
185
tokenizeLineToHTML(text, lineTokens, colorMap, 5, 11, 4, true),
186
[
187
'<div>',
188
'<span style="color: #00ff00;">hello</span>',
189
'<span style="color: #000000;"> </span>',
190
'</div>'
191
].join('')
192
);
193
194
assert.strictEqual(
195
tokenizeLineToHTML(text, lineTokens, colorMap, 5, 10, 4, true),
196
[
197
'<div>',
198
'<span style="color: #00ff00;">hello</span>',
199
'</div>'
200
].join('')
201
);
202
203
assert.strictEqual(
204
tokenizeLineToHTML(text, lineTokens, colorMap, 6, 9, 4, true),
205
[
206
'<div>',
207
'<span style="color: #00ff00;">ell</span>',
208
'</div>'
209
].join('')
210
);
211
});
212
test('tokenizeLineToHTML handle spaces #35954', () => {
213
const text = ' Ciao hello world!';
214
const lineTokens = new TestLineTokens([
215
new TestLineToken(
216
2,
217
(
218
(1 << MetadataConsts.FOREGROUND_OFFSET)
219
) >>> 0
220
),
221
new TestLineToken(
222
6,
223
(
224
(3 << MetadataConsts.FOREGROUND_OFFSET)
225
| ((FontStyle.Bold | FontStyle.Italic) << MetadataConsts.FONT_STYLE_OFFSET)
226
) >>> 0
227
),
228
new TestLineToken(
229
9,
230
(
231
(1 << MetadataConsts.FOREGROUND_OFFSET)
232
) >>> 0
233
),
234
new TestLineToken(
235
14,
236
(
237
(4 << MetadataConsts.FOREGROUND_OFFSET)
238
) >>> 0
239
),
240
new TestLineToken(
241
15,
242
(
243
(1 << MetadataConsts.FOREGROUND_OFFSET)
244
) >>> 0
245
),
246
new TestLineToken(
247
21,
248
(
249
(5 << MetadataConsts.FOREGROUND_OFFSET)
250
| ((FontStyle.Underline) << MetadataConsts.FONT_STYLE_OFFSET)
251
) >>> 0
252
)
253
]);
254
const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00', '#0000ff'];
255
256
assert.strictEqual(
257
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 21, 4, true),
258
[
259
'<div>',
260
'<span style="color: #000000;">&#160; </span>',
261
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',
262
'<span style="color: #000000;"> &#160; </span>',
263
'<span style="color: #00ff00;">hello</span>',
264
'<span style="color: #000000;"> </span>',
265
'<span style="color: #0000ff;text-decoration: underline;">world!</span>',
266
'</div>'
267
].join('')
268
);
269
270
assert.strictEqual(
271
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 17, 4, true),
272
[
273
'<div>',
274
'<span style="color: #000000;">&#160; </span>',
275
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">Ciao</span>',
276
'<span style="color: #000000;"> &#160; </span>',
277
'<span style="color: #00ff00;">hello</span>',
278
'<span style="color: #000000;"> </span>',
279
'<span style="color: #0000ff;text-decoration: underline;">wo</span>',
280
'</div>'
281
].join('')
282
);
283
284
assert.strictEqual(
285
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 3, 4, true),
286
[
287
'<div>',
288
'<span style="color: #000000;">&#160; </span>',
289
'<span style="color: #ff0000;font-style: italic;font-weight: bold;">C</span>',
290
'</div>'
291
].join('')
292
);
293
});
294
295
test('tokenizeLineToHTML with tabs and non-zero startOffset #263387', () => {
296
// This test demonstrates the issue where tab padding is calculated incorrectly
297
// when startOffset is non-zero and there are tabs AFTER the start position.
298
// The bug: tabsCharDelta doesn't account for characters before startOffset.
299
300
const colorMap = [null!, '#000000', '#ffffff', '#ff0000', '#00ff00'];
301
302
// Critical test case: "\ta\tb" starting at position 2 (skipping first tab and 'a')
303
// Layout: First tab (pos 0) goes to column 4, 'a' (pos 1) at column 4,
304
// second tab (pos 2) should go from column 5 to column 8 (3 spaces)
305
// With the bug: charIndex starts at 2, tabsCharDelta=0 (first tab was never seen)
306
// When processing second tab: insertSpacesCount = 4 - (2 + 0) % 4 = 2 spaces (WRONG!)
307
// The old code thinks it's at column 2, but it's actually at column 5
308
const text = '\ta\tb';
309
const lineTokens = new TestLineTokens([
310
new TestLineToken(
311
1,
312
(
313
(1 << MetadataConsts.FOREGROUND_OFFSET)
314
) >>> 0
315
),
316
new TestLineToken(
317
2,
318
(
319
(3 << MetadataConsts.FOREGROUND_OFFSET)
320
) >>> 0
321
),
322
new TestLineToken(
323
3,
324
(
325
(1 << MetadataConsts.FOREGROUND_OFFSET)
326
) >>> 0
327
),
328
new TestLineToken(
329
4,
330
(
331
(4 << MetadataConsts.FOREGROUND_OFFSET)
332
) >>> 0
333
)
334
]);
335
336
// First, verify the full line works correctly
337
assert.strictEqual(
338
tokenizeLineToHTML(text, lineTokens, colorMap, 0, 4, 4, true),
339
[
340
'<div>',
341
'<span style="color: #000000;">&#160; &#160; </span>', // First tab: 4 spaces
342
'<span style="color: #ff0000;">a</span>', // 'a' at column 4
343
'<span style="color: #000000;"> &#160; </span>', // Second tab: 3 spaces (column 5 to 8)
344
'<span style="color: #00ff00;">b</span>',
345
'</div>'
346
].join('')
347
);
348
349
// THE BUG: Starting at position 2 (after first tab and 'a')
350
// Expected (with fix): 3 spaces for the second tab (column 5 to 8)
351
// Buggy behavior (old code): 2 spaces (thinks it's at column 2, gives &#160; )
352
// The fix correctly accounts for the skipped tab and 'a', outputting &#160; &#160;
353
assert.strictEqual(
354
tokenizeLineToHTML(text, lineTokens, colorMap, 2, 4, 4, true),
355
[
356
'<div>',
357
'<span style="color: #000000;">&#160; &#160;</span>', // With fix: 3 spaces; with bug: only 2 spaces
358
'<span style="color: #00ff00;">b</span>',
359
'</div>'
360
].join('')
361
);
362
});
363
364
});
365
366
class Mode extends Disposable {
367
368
public readonly languageId = 'textToHtmlTokenizerMode';
369
370
constructor(
371
@ILanguageService languageService: ILanguageService
372
) {
373
super();
374
this._register(languageService.registerLanguage({ id: this.languageId }));
375
this._register(TokenizationRegistry.register(this.languageId, {
376
getInitialState: (): IState => null!,
377
tokenize: undefined!,
378
tokenizeEncoded: (line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult => {
379
const tokensArr: number[] = [];
380
let prevColor = -1 as ColorId;
381
for (let i = 0; i < line.length; i++) {
382
const colorId = (line.charAt(i) === '.' ? 7 : 9) as ColorId;
383
if (prevColor !== colorId) {
384
tokensArr.push(i);
385
tokensArr.push((
386
colorId << MetadataConsts.FOREGROUND_OFFSET
387
) >>> 0);
388
}
389
prevColor = colorId;
390
}
391
392
const tokens = new Uint32Array(tokensArr.length);
393
for (let i = 0; i < tokens.length; i++) {
394
tokens[i] = tokensArr[i];
395
}
396
return new EncodedTokenizationResult(tokens, [], null!);
397
}
398
}));
399
}
400
}
401
402