Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/textModelTokens.ts
3294 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { IdleDeadline, runWhenGlobalIdle } from '../../../base/common/async.js';
7
import { BugIndicatingError, onUnexpectedError } from '../../../base/common/errors.js';
8
import { setTimeout0 } from '../../../base/common/platform.js';
9
import { StopWatch } from '../../../base/common/stopwatch.js';
10
import { countEOL } from '../core/misc/eolCounter.js';
11
import { LineRange } from '../core/ranges/lineRange.js';
12
import { OffsetRange } from '../core/ranges/offsetRange.js';
13
import { Position } from '../core/position.js';
14
import { StandardTokenType } from '../encodedTokenAttributes.js';
15
import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport } from '../languages.js';
16
import { nullTokenizeEncoded } from '../languages/nullTokenize.js';
17
import { ITextModel } from '../model.js';
18
import { FixedArray } from './fixedArray.js';
19
import { IModelContentChange } from './mirrorTextModel.js';
20
import { ContiguousMultilineTokensBuilder } from '../tokens/contiguousMultilineTokensBuilder.js';
21
import { LineTokens } from '../tokens/lineTokens.js';
22
23
const enum Constants {
24
CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
25
}
26
27
export class TokenizerWithStateStore<TState extends IState = IState> {
28
private readonly initialState;
29
30
public readonly store: TrackingTokenizationStateStore<TState>;
31
32
constructor(
33
lineCount: number,
34
public readonly tokenizationSupport: ITokenizationSupport
35
) {
36
this.initialState = this.tokenizationSupport.getInitialState() as TState;
37
this.store = new TrackingTokenizationStateStore<TState>(lineCount);
38
}
39
40
public getStartState(lineNumber: number): TState | null {
41
return this.store.getStartState(lineNumber, this.initialState);
42
}
43
44
public getFirstInvalidLine(): { lineNumber: number; startState: TState } | null {
45
return this.store.getFirstInvalidLine(this.initialState);
46
}
47
}
48
49
export class TokenizerWithStateStoreAndTextModel<TState extends IState = IState> extends TokenizerWithStateStore<TState> {
50
constructor(
51
lineCount: number,
52
tokenizationSupport: ITokenizationSupport,
53
public readonly _textModel: ITextModel,
54
public readonly _languageIdCodec: ILanguageIdCodec
55
) {
56
super(lineCount, tokenizationSupport);
57
}
58
59
public updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
60
const languageId = this._textModel.getLanguageId();
61
62
while (true) {
63
const lineToTokenize = this.getFirstInvalidLine();
64
if (!lineToTokenize || lineToTokenize.lineNumber > lineNumber) {
65
break;
66
}
67
68
const text = this._textModel.getLineContent(lineToTokenize.lineNumber);
69
70
const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, lineToTokenize.startState);
71
builder.add(lineToTokenize.lineNumber, r.tokens);
72
this.store.setEndState(lineToTokenize.lineNumber, r.endState as TState);
73
}
74
}
75
76
/** assumes state is up to date */
77
public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType {
78
// TODO@hediet: use tokenizeLineWithEdit
79
const lineStartState = this.getStartState(position.lineNumber);
80
if (!lineStartState) {
81
return StandardTokenType.Other;
82
}
83
84
const languageId = this._textModel.getLanguageId();
85
const lineContent = this._textModel.getLineContent(position.lineNumber);
86
87
// Create the text as if `character` was inserted
88
const text = (
89
lineContent.substring(0, position.column - 1)
90
+ character
91
+ lineContent.substring(position.column - 1)
92
);
93
94
const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState);
95
const lineTokens = new LineTokens(r.tokens, text, this._languageIdCodec);
96
if (lineTokens.getCount() === 0) {
97
return StandardTokenType.Other;
98
}
99
100
const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
101
return lineTokens.getStandardTokenType(tokenIndex);
102
}
103
104
/** assumes state is up to date */
105
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
106
const lineStartState: IState | null = this.getStartState(lineNumber);
107
if (!lineStartState) {
108
return null;
109
}
110
111
const languageId = this._textModel.getLanguageId();
112
const result: LineTokens[] = [];
113
114
let state = lineStartState;
115
for (const line of lines) {
116
const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, line, true, state);
117
result.push(new LineTokens(r.tokens, line, this._languageIdCodec));
118
state = r.endState;
119
}
120
121
return result;
122
}
123
124
public hasAccurateTokensForLine(lineNumber: number): boolean {
125
const firstInvalidLineNumber = this.store.getFirstInvalidEndStateLineNumberOrMax();
126
return (lineNumber < firstInvalidLineNumber);
127
}
128
129
public isCheapToTokenize(lineNumber: number): boolean {
130
const firstInvalidLineNumber = this.store.getFirstInvalidEndStateLineNumberOrMax();
131
if (lineNumber < firstInvalidLineNumber) {
132
return true;
133
}
134
if (lineNumber === firstInvalidLineNumber
135
&& this._textModel.getLineLength(lineNumber) < Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT) {
136
return true;
137
}
138
139
return false;
140
}
141
142
/**
143
* The result is not cached.
144
*/
145
public tokenizeHeuristically(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): { heuristicTokens: boolean } {
146
if (endLineNumber <= this.store.getFirstInvalidEndStateLineNumberOrMax()) {
147
// nothing to do
148
return { heuristicTokens: false };
149
}
150
151
if (startLineNumber <= this.store.getFirstInvalidEndStateLineNumberOrMax()) {
152
// tokenization has reached the viewport start...
153
this.updateTokensUntilLine(builder, endLineNumber);
154
return { heuristicTokens: false };
155
}
156
157
let state = this.guessStartState(startLineNumber);
158
const languageId = this._textModel.getLanguageId();
159
160
for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
161
const text = this._textModel.getLineContent(lineNumber);
162
const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, state);
163
builder.add(lineNumber, r.tokens);
164
state = r.endState;
165
}
166
167
return { heuristicTokens: true };
168
}
169
170
private guessStartState(lineNumber: number): IState {
171
let { likelyRelevantLines, initialState } = findLikelyRelevantLines(this._textModel, lineNumber, this);
172
173
if (!initialState) {
174
initialState = this.tokenizationSupport.getInitialState();
175
}
176
177
const languageId = this._textModel.getLanguageId();
178
let state = initialState;
179
for (const line of likelyRelevantLines) {
180
const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, line, false, state);
181
state = r.endState;
182
}
183
return state;
184
}
185
}
186
187
export function findLikelyRelevantLines(model: ITextModel, lineNumber: number, store?: TokenizerWithStateStore): { likelyRelevantLines: string[]; initialState?: IState } {
188
let nonWhitespaceColumn = model.getLineFirstNonWhitespaceColumn(lineNumber);
189
const likelyRelevantLines: string[] = [];
190
let initialState: IState | null | undefined = null;
191
for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
192
const newNonWhitespaceIndex = model.getLineFirstNonWhitespaceColumn(i);
193
// Ignore lines full of whitespace
194
if (newNonWhitespaceIndex === 0) {
195
continue;
196
}
197
if (newNonWhitespaceIndex < nonWhitespaceColumn) {
198
likelyRelevantLines.push(model.getLineContent(i));
199
nonWhitespaceColumn = newNonWhitespaceIndex;
200
initialState = store?.getStartState(i);
201
if (initialState) {
202
break;
203
}
204
}
205
}
206
207
likelyRelevantLines.reverse();
208
return { likelyRelevantLines, initialState: initialState ?? undefined };
209
}
210
211
/**
212
* **Invariant:**
213
* If the text model is retokenized from line 1 to {@link getFirstInvalidEndStateLineNumber}() - 1,
214
* then the recomputed end state for line l will be equal to {@link getEndState}(l).
215
*/
216
export class TrackingTokenizationStateStore<TState extends IState> {
217
private readonly _tokenizationStateStore = new TokenizationStateStore<TState>();
218
private readonly _invalidEndStatesLineNumbers = new RangePriorityQueueImpl();
219
220
constructor(private lineCount: number) {
221
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(1, lineCount + 1));
222
}
223
224
public getEndState(lineNumber: number): TState | null {
225
return this._tokenizationStateStore.getEndState(lineNumber);
226
}
227
228
/**
229
* @returns if the end state has changed.
230
*/
231
public setEndState(lineNumber: number, state: TState): boolean {
232
if (!state) {
233
throw new BugIndicatingError('Cannot set null/undefined state');
234
}
235
236
this._invalidEndStatesLineNumbers.delete(lineNumber);
237
const r = this._tokenizationStateStore.setEndState(lineNumber, state);
238
if (r && lineNumber < this.lineCount) {
239
// because the state changed, we cannot trust the next state anymore and have to invalidate it.
240
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(lineNumber + 1, lineNumber + 2));
241
}
242
243
return r;
244
}
245
246
public acceptChange(range: LineRange, newLineCount: number): void {
247
this.lineCount += newLineCount - range.length;
248
this._tokenizationStateStore.acceptChange(range, newLineCount);
249
this._invalidEndStatesLineNumbers.addRangeAndResize(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive), newLineCount);
250
}
251
252
public acceptChanges(changes: IModelContentChange[]) {
253
for (const c of changes) {
254
const [eolCount] = countEOL(c.text);
255
this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);
256
}
257
}
258
259
public invalidateEndStateRange(range: LineRange): void {
260
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive));
261
}
262
263
public getFirstInvalidEndStateLineNumber(): number | null { return this._invalidEndStatesLineNumbers.min; }
264
265
public getFirstInvalidEndStateLineNumberOrMax(): number {
266
return this.getFirstInvalidEndStateLineNumber() || Number.MAX_SAFE_INTEGER;
267
}
268
269
public allStatesValid(): boolean { return this._invalidEndStatesLineNumbers.min === null; }
270
271
public getStartState(lineNumber: number, initialState: TState): TState | null {
272
if (lineNumber === 1) { return initialState; }
273
return this.getEndState(lineNumber - 1);
274
}
275
276
public getFirstInvalidLine(initialState: TState): { lineNumber: number; startState: TState } | null {
277
const lineNumber = this.getFirstInvalidEndStateLineNumber();
278
if (lineNumber === null) {
279
return null;
280
}
281
const startState = this.getStartState(lineNumber, initialState);
282
if (!startState) {
283
throw new BugIndicatingError('Start state must be defined');
284
}
285
286
return { lineNumber, startState };
287
}
288
}
289
290
export class TokenizationStateStore<TState extends IState> {
291
private readonly _lineEndStates = new FixedArray<TState | null>(null);
292
293
public getEndState(lineNumber: number): TState | null {
294
return this._lineEndStates.get(lineNumber);
295
}
296
297
public setEndState(lineNumber: number, state: TState): boolean {
298
const oldState = this._lineEndStates.get(lineNumber);
299
if (oldState && oldState.equals(state)) {
300
return false;
301
}
302
303
this._lineEndStates.set(lineNumber, state);
304
return true;
305
}
306
307
public acceptChange(range: LineRange, newLineCount: number): void {
308
let length = range.length;
309
if (newLineCount > 0 && length > 0) {
310
// Keep the last state, even though it is unrelated.
311
// But if the new state happens to agree with this last state, then we know we can stop tokenizing.
312
length--;
313
newLineCount--;
314
}
315
316
this._lineEndStates.replace(range.startLineNumber, length, newLineCount);
317
}
318
319
public acceptChanges(changes: IModelContentChange[]) {
320
for (const c of changes) {
321
const [eolCount] = countEOL(c.text);
322
this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);
323
}
324
}
325
}
326
327
interface RangePriorityQueue {
328
get min(): number | null;
329
removeMin(): number | null;
330
331
addRange(range: OffsetRange): void;
332
333
addRangeAndResize(range: OffsetRange, newLength: number): void;
334
}
335
336
export class RangePriorityQueueImpl implements RangePriorityQueue {
337
private readonly _ranges: OffsetRange[] = [];
338
339
public getRanges(): OffsetRange[] {
340
return this._ranges;
341
}
342
343
public get min(): number | null {
344
if (this._ranges.length === 0) {
345
return null;
346
}
347
return this._ranges[0].start;
348
}
349
350
public removeMin(): number | null {
351
if (this._ranges.length === 0) {
352
return null;
353
}
354
const range = this._ranges[0];
355
if (range.start + 1 === range.endExclusive) {
356
this._ranges.shift();
357
} else {
358
this._ranges[0] = new OffsetRange(range.start + 1, range.endExclusive);
359
}
360
return range.start;
361
}
362
363
public delete(value: number): void {
364
const idx = this._ranges.findIndex(r => r.contains(value));
365
if (idx !== -1) {
366
const range = this._ranges[idx];
367
if (range.start === value) {
368
if (range.endExclusive === value + 1) {
369
this._ranges.splice(idx, 1);
370
} else {
371
this._ranges[idx] = new OffsetRange(value + 1, range.endExclusive);
372
}
373
} else {
374
if (range.endExclusive === value + 1) {
375
this._ranges[idx] = new OffsetRange(range.start, value);
376
} else {
377
this._ranges.splice(idx, 1, new OffsetRange(range.start, value), new OffsetRange(value + 1, range.endExclusive));
378
}
379
}
380
}
381
}
382
383
public addRange(range: OffsetRange): void {
384
OffsetRange.addRange(range, this._ranges);
385
}
386
387
public addRangeAndResize(range: OffsetRange, newLength: number): void {
388
let idxFirstMightBeIntersecting = 0;
389
while (!(idxFirstMightBeIntersecting >= this._ranges.length || range.start <= this._ranges[idxFirstMightBeIntersecting].endExclusive)) {
390
idxFirstMightBeIntersecting++;
391
}
392
let idxFirstIsAfter = idxFirstMightBeIntersecting;
393
while (!(idxFirstIsAfter >= this._ranges.length || range.endExclusive < this._ranges[idxFirstIsAfter].start)) {
394
idxFirstIsAfter++;
395
}
396
const delta = newLength - range.length;
397
398
for (let i = idxFirstIsAfter; i < this._ranges.length; i++) {
399
this._ranges[i] = this._ranges[i].delta(delta);
400
}
401
402
if (idxFirstMightBeIntersecting === idxFirstIsAfter) {
403
const newRange = new OffsetRange(range.start, range.start + newLength);
404
if (!newRange.isEmpty) {
405
this._ranges.splice(idxFirstMightBeIntersecting, 0, newRange);
406
}
407
} else {
408
const start = Math.min(range.start, this._ranges[idxFirstMightBeIntersecting].start);
409
const endEx = Math.max(range.endExclusive, this._ranges[idxFirstIsAfter - 1].endExclusive);
410
411
const newRange = new OffsetRange(start, endEx + delta);
412
if (!newRange.isEmpty) {
413
this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting, newRange);
414
} else {
415
this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting);
416
}
417
}
418
}
419
420
toString() {
421
return this._ranges.map(r => r.toString()).join(' + ');
422
}
423
}
424
425
426
function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tokenizationSupport: ITokenizationSupport | null, text: string, hasEOL: boolean, state: IState): EncodedTokenizationResult {
427
let r: EncodedTokenizationResult | null = null;
428
429
if (tokenizationSupport) {
430
try {
431
r = tokenizationSupport.tokenizeEncoded(text, hasEOL, state.clone());
432
} catch (e) {
433
onUnexpectedError(e);
434
}
435
}
436
437
if (!r) {
438
r = nullTokenizeEncoded(languageIdCodec.encodeLanguageId(languageId), state);
439
}
440
441
LineTokens.convertToEndOffset(r.tokens, text.length);
442
return r;
443
}
444
445
export class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
446
private _isDisposed = false;
447
448
constructor(
449
private readonly _tokenizerWithStateStore: TokenizerWithStateStoreAndTextModel,
450
private readonly _backgroundTokenStore: IBackgroundTokenizationStore,
451
) {
452
}
453
454
public dispose(): void {
455
this._isDisposed = true;
456
}
457
458
public handleChanges(): void {
459
this._beginBackgroundTokenization();
460
}
461
462
private _isScheduled = false;
463
private _beginBackgroundTokenization(): void {
464
if (this._isScheduled || !this._tokenizerWithStateStore._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
465
return;
466
}
467
468
this._isScheduled = true;
469
runWhenGlobalIdle((deadline) => {
470
this._isScheduled = false;
471
472
this._backgroundTokenizeWithDeadline(deadline);
473
});
474
}
475
476
/**
477
* Tokenize until the deadline occurs, but try to yield every 1-2ms.
478
*/
479
private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
480
// Read the time remaining from the `deadline` immediately because it is unclear
481
// if the `deadline` object will be valid after execution leaves this function.
482
const endTime = Date.now() + deadline.timeRemaining();
483
484
const execute = () => {
485
if (this._isDisposed || !this._tokenizerWithStateStore._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
486
// disposed in the meantime or detached or finished
487
return;
488
}
489
490
this._backgroundTokenizeForAtLeast1ms();
491
492
if (Date.now() < endTime) {
493
// There is still time before reaching the deadline, so yield to the browser and then
494
// continue execution
495
setTimeout0(execute);
496
} else {
497
// The deadline has been reached, so schedule a new idle callback if necessary
498
this._beginBackgroundTokenization();
499
}
500
};
501
execute();
502
}
503
504
/**
505
* Tokenize for at least 1ms.
506
*/
507
private _backgroundTokenizeForAtLeast1ms(): void {
508
const lineCount = this._tokenizerWithStateStore._textModel.getLineCount();
509
const builder = new ContiguousMultilineTokensBuilder();
510
const sw = StopWatch.create(false);
511
512
do {
513
if (sw.elapsed() > 1) {
514
// the comparison is intentionally > 1 and not >= 1 to ensure that
515
// a full millisecond has elapsed, given how microseconds are rounded
516
// to milliseconds
517
break;
518
}
519
520
const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
521
522
if (tokenizedLineNumber >= lineCount) {
523
break;
524
}
525
} while (this._hasLinesToTokenize());
526
527
this._backgroundTokenStore.setTokens(builder.finalize());
528
this.checkFinished();
529
}
530
531
private _hasLinesToTokenize(): boolean {
532
if (!this._tokenizerWithStateStore) {
533
return false;
534
}
535
return !this._tokenizerWithStateStore.store.allStatesValid();
536
}
537
538
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
539
const firstInvalidLine = this._tokenizerWithStateStore?.getFirstInvalidLine();
540
if (!firstInvalidLine) {
541
return this._tokenizerWithStateStore._textModel.getLineCount() + 1;
542
}
543
this._tokenizerWithStateStore.updateTokensUntilLine(builder, firstInvalidLine.lineNumber);
544
return firstInvalidLine.lineNumber;
545
}
546
547
public checkFinished(): void {
548
if (this._isDisposed) {
549
return;
550
}
551
if (this._tokenizerWithStateStore.store.allStatesValid()) {
552
this._backgroundTokenStore.backgroundTokenizationFinished();
553
}
554
}
555
556
public requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {
557
this._tokenizerWithStateStore.store.invalidateEndStateRange(new LineRange(startLineNumber, endLineNumberExclusive));
558
}
559
}
560
561