Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/tokens/tokenizationTextModelPart.ts
5237 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../../base/common/charCode.js';
7
import { BugIndicatingError } from '../../../../base/common/errors.js';
8
import { Emitter, Event } from '../../../../base/common/event.js';
9
import { countEOL } from '../../core/misc/eolCounter.js';
10
import { IPosition, Position } from '../../core/position.js';
11
import { Range } from '../../core/range.js';
12
import { IWordAtPosition, getWordAtText } from '../../core/wordHelper.js';
13
import { StandardTokenType } from '../../encodedTokenAttributes.js';
14
import { ILanguageService } from '../../languages/language.js';
15
import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../../languages/languageConfigurationRegistry.js';
16
import { BracketPairsTextModelPart } from '../bracketPairsTextModelPart/bracketPairsImpl.js';
17
import { TextModel } from '../textModel.js';
18
import { TextModelPart } from '../textModelPart.js';
19
import { AbstractSyntaxTokenBackend, AttachedViews } from './abstractSyntaxTokenBackend.js';
20
import { TreeSitterSyntaxTokenBackend } from './treeSitter/treeSitterSyntaxTokenBackend.js';
21
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent, IModelFontTokensChangedEvent } from '../../textModelEvents.js';
22
import { ITokenizationTextModelPart } from '../../tokenizationTextModelPart.js';
23
import { LineTokens } from '../../tokens/lineTokens.js';
24
import { SparseMultilineTokens } from '../../tokens/sparseMultilineTokens.js';
25
import { SparseTokensStore } from '../../tokens/sparseTokensStore.js';
26
import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';
27
import { TokenizerSyntaxTokenBackend } from './tokenizerSyntaxTokenBackend.js';
28
import { ITreeSitterLibraryService } from '../../services/treeSitter/treeSitterLibraryService.js';
29
import { derived, IObservable, ISettableObservable, observableValue } from '../../../../base/common/observable.js';
30
31
export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
32
private readonly _semanticTokens: SparseTokensStore;
33
34
private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;
35
public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;
36
37
private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;
38
public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;
39
40
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;
41
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;
42
43
private readonly _onDidChangeFontTokens: Emitter<IModelFontTokensChangedEvent> = this._register(new Emitter<IModelFontTokensChangedEvent>());
44
public readonly onDidChangeFontTokens: Event<IModelFontTokensChangedEvent> = this._onDidChangeFontTokens.event;
45
46
public readonly tokens: IObservable<AbstractSyntaxTokenBackend>;
47
private readonly _useTreeSitter: IObservable<boolean>;
48
private readonly _languageIdObs: ISettableObservable<string>;
49
50
constructor(
51
private readonly _textModel: TextModel,
52
private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,
53
private _languageId: string,
54
private readonly _attachedViews: AttachedViews,
55
@ILanguageService private readonly _languageService: ILanguageService,
56
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
57
@IInstantiationService private readonly _instantiationService: IInstantiationService,
58
@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,
59
) {
60
super();
61
62
this._languageIdObs = observableValue(this, this._languageId);
63
64
this._useTreeSitter = derived(this, reader => {
65
const languageId = this._languageIdObs.read(reader);
66
return this._treeSitterLibraryService.supportsLanguage(languageId, reader);
67
});
68
69
this.tokens = derived(this, reader => {
70
let tokens: AbstractSyntaxTokenBackend;
71
if (this._useTreeSitter.read(reader)) {
72
tokens = reader.store.add(this._instantiationService.createInstance(
73
TreeSitterSyntaxTokenBackend,
74
this._languageIdObs,
75
this._languageService.languageIdCodec,
76
this._textModel,
77
this._attachedViews.visibleLineRanges
78
));
79
} else {
80
tokens = reader.store.add(new TokenizerSyntaxTokenBackend(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));
81
}
82
83
reader.store.add(tokens.onDidChangeTokens(e => {
84
this._emitModelTokensChangedEvent(e);
85
}));
86
reader.store.add(tokens.onDidChangeFontTokens(e => {
87
if (!this._textModel._isDisposing()) {
88
this._onDidChangeFontTokens.fire(e);
89
}
90
}));
91
92
reader.store.add(tokens.onDidChangeBackgroundTokenizationState(e => {
93
this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
94
}));
95
return tokens;
96
});
97
98
let hadTokens = false;
99
this.tokens.recomputeInitiallyAndOnChange(this._store, value => {
100
if (hadTokens) {
101
// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.
102
// TODO@hediet: Look into why this is needed.
103
value.todo_resetTokenization();
104
}
105
hadTokens = true;
106
});
107
108
this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);
109
this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());
110
this.onDidChangeLanguage = this._onDidChangeLanguage.event;
111
this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());
112
this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;
113
this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());
114
this.onDidChangeTokens = this._onDidChangeTokens.event;
115
this._onDidChangeFontTokens = this._register(new Emitter<IModelFontTokensChangedEvent>());
116
this.onDidChangeFontTokens = this._onDidChangeFontTokens.event;
117
}
118
119
_hasListeners(): boolean {
120
// Note: _onDidChangeFontTokens is intentionally excluded because it's an internal event
121
// that TokenizationFontDecorationProvider subscribes to during TextModel construction
122
return (this._onDidChangeLanguage.hasListeners()
123
|| this._onDidChangeLanguageConfiguration.hasListeners()
124
|| this._onDidChangeTokens.hasListeners());
125
}
126
127
public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {
128
if (e.affects(this._languageId)) {
129
this._onDidChangeLanguageConfiguration.fire({});
130
}
131
}
132
133
public handleDidChangeContent(e: IModelContentChangedEvent): void {
134
if (e.isFlush) {
135
this._semanticTokens.flush();
136
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
137
for (const c of e.changes) {
138
const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);
139
140
this._semanticTokens.acceptEdit(
141
c.range,
142
eolCount,
143
firstLineLength,
144
lastLineLength,
145
c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null
146
);
147
}
148
}
149
150
this.tokens.get().handleDidChangeContent(e);
151
}
152
153
public handleDidChangeAttached(): void {
154
this.tokens.get().handleDidChangeAttached();
155
}
156
157
/**
158
* Includes grammar and semantic tokens.
159
*/
160
public getLineTokens(lineNumber: number): LineTokens {
161
this.validateLineNumber(lineNumber);
162
const syntacticTokens = this.tokens.get().getLineTokens(lineNumber);
163
return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);
164
}
165
166
private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {
167
if (!this._textModel._isDisposing()) {
168
this._bracketPairsTextModelPart.handleDidChangeTokens(e);
169
this._onDidChangeTokens.fire(e);
170
}
171
}
172
173
// #region Grammar Tokens
174
175
private validateLineNumber(lineNumber: number): void {
176
if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {
177
throw new BugIndicatingError('Illegal value for lineNumber');
178
}
179
}
180
181
public get hasTokens(): boolean {
182
return this.tokens.get().hasTokens;
183
}
184
185
public resetTokenization() {
186
this.tokens.get().todo_resetTokenization();
187
}
188
189
public get backgroundTokenizationState() {
190
return this.tokens.get().backgroundTokenizationState;
191
}
192
193
public forceTokenization(lineNumber: number): void {
194
this.validateLineNumber(lineNumber);
195
this.tokens.get().forceTokenization(lineNumber);
196
}
197
198
public hasAccurateTokensForLine(lineNumber: number): boolean {
199
this.validateLineNumber(lineNumber);
200
return this.tokens.get().hasAccurateTokensForLine(lineNumber);
201
}
202
203
public isCheapToTokenize(lineNumber: number): boolean {
204
this.validateLineNumber(lineNumber);
205
return this.tokens.get().isCheapToTokenize(lineNumber);
206
}
207
208
public tokenizeIfCheap(lineNumber: number): void {
209
this.validateLineNumber(lineNumber);
210
this.tokens.get().tokenizeIfCheap(lineNumber);
211
}
212
213
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
214
return this.tokens.get().getTokenTypeIfInsertingCharacter(lineNumber, column, character);
215
}
216
217
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
218
return this.tokens.get().tokenizeLinesAt(lineNumber, lines);
219
}
220
221
// #endregion
222
223
// #region Semantic Tokens
224
225
public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {
226
this._semanticTokens.set(tokens, isComplete, this._textModel);
227
228
this._emitModelTokensChangedEvent({
229
semanticTokensApplied: tokens !== null,
230
ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],
231
});
232
}
233
234
public hasCompleteSemanticTokens(): boolean {
235
return this._semanticTokens.isComplete();
236
}
237
238
public hasSomeSemanticTokens(): boolean {
239
return !this._semanticTokens.isEmpty();
240
}
241
242
public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {
243
if (this.hasCompleteSemanticTokens()) {
244
return;
245
}
246
const changedRange = this._textModel.validateRange(
247
this._semanticTokens.setPartial(range, tokens)
248
);
249
250
this._emitModelTokensChangedEvent({
251
semanticTokensApplied: true,
252
ranges: [
253
{
254
fromLineNumber: changedRange.startLineNumber,
255
toLineNumber: changedRange.endLineNumber,
256
},
257
],
258
});
259
}
260
261
// #endregion
262
263
// #region Utility Methods
264
265
public getWordAtPosition(_position: IPosition): IWordAtPosition | null {
266
this.assertNotDisposed();
267
268
const position = this._textModel.validatePosition(_position);
269
const lineContent = this._textModel.getLineContent(position.lineNumber);
270
const lineTokens = this.getLineTokens(position.lineNumber);
271
const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
272
273
// (1). First try checking right biased word
274
const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);
275
const rightBiasedWord = getWordAtText(
276
position.column,
277
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),
278
lineContent.substring(rbStartOffset, rbEndOffset),
279
rbStartOffset
280
);
281
// Make sure the result touches the original passed in position
282
if (
283
rightBiasedWord &&
284
rightBiasedWord.startColumn <= _position.column &&
285
_position.column <= rightBiasedWord.endColumn
286
) {
287
return rightBiasedWord;
288
}
289
290
// (2). Else, if we were at a language boundary, check the left biased word
291
if (tokenIndex > 0 && rbStartOffset === position.column - 1) {
292
// edge case, where `position` sits between two tokens belonging to two different languages
293
const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(
294
lineTokens,
295
tokenIndex - 1
296
);
297
const leftBiasedWord = getWordAtText(
298
position.column,
299
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),
300
lineContent.substring(lbStartOffset, lbEndOffset),
301
lbStartOffset
302
);
303
// Make sure the result touches the original passed in position
304
if (
305
leftBiasedWord &&
306
leftBiasedWord.startColumn <= _position.column &&
307
_position.column <= leftBiasedWord.endColumn
308
) {
309
return leftBiasedWord;
310
}
311
}
312
313
return null;
314
}
315
316
private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {
317
return this._languageConfigurationService.getLanguageConfiguration(languageId);
318
}
319
320
private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {
321
const languageId = lineTokens.getLanguageId(tokenIndex);
322
323
// go left until a different language is hit
324
let startOffset = 0;
325
for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {
326
startOffset = lineTokens.getStartOffset(i);
327
}
328
329
// go right until a different language is hit
330
let endOffset = lineTokens.getLineContent().length;
331
for (
332
let i = tokenIndex, tokenCount = lineTokens.getCount();
333
i < tokenCount && lineTokens.getLanguageId(i) === languageId;
334
i++
335
) {
336
endOffset = lineTokens.getEndOffset(i);
337
}
338
339
return [startOffset, endOffset];
340
}
341
342
public getWordUntilPosition(position: IPosition): IWordAtPosition {
343
const wordAtPosition = this.getWordAtPosition(position);
344
if (!wordAtPosition) {
345
return { word: '', startColumn: position.column, endColumn: position.column, };
346
}
347
return {
348
word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),
349
startColumn: wordAtPosition.startColumn,
350
endColumn: position.column,
351
};
352
}
353
354
// #endregion
355
356
// #region Language Id handling
357
358
public getLanguageId(): string {
359
return this._languageId;
360
}
361
362
public getLanguageIdAtPosition(lineNumber: number, column: number): string {
363
const position = this._textModel.validatePosition(new Position(lineNumber, column));
364
const lineTokens = this.getLineTokens(position.lineNumber);
365
return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));
366
}
367
368
public setLanguageId(languageId: string, source: string = 'api'): void {
369
if (this._languageId === languageId) {
370
// There's nothing to do
371
return;
372
}
373
374
const e: IModelLanguageChangedEvent = {
375
oldLanguage: this._languageId,
376
newLanguage: languageId,
377
source
378
};
379
380
this._languageId = languageId;
381
this._languageIdObs.set(languageId, undefined);
382
this._bracketPairsTextModelPart.handleDidChangeLanguage(e);
383
384
this._onDidChangeLanguage.fire(e);
385
this._onDidChangeLanguageConfiguration.fire({});
386
}
387
388
// #endregion
389
}
390
391