Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/model/tokens/tokenizationTextModelPart.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../../base/common/charCode.js';
7
import { BugIndicatingError } from '../../../../base/common/errors.js';
8
import { Emitter, Event } from '../../../../base/common/event.js';
9
import { countEOL } from '../../core/misc/eolCounter.js';
10
import { IPosition, Position } from '../../core/position.js';
11
import { Range } from '../../core/range.js';
12
import { IWordAtPosition, getWordAtText } from '../../core/wordHelper.js';
13
import { StandardTokenType } from '../../encodedTokenAttributes.js';
14
import { ILanguageService } from '../../languages/language.js';
15
import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../../languages/languageConfigurationRegistry.js';
16
import { BracketPairsTextModelPart } from '../bracketPairsTextModelPart/bracketPairsImpl.js';
17
import { TextModel } from '../textModel.js';
18
import { TextModelPart } from '../textModelPart.js';
19
import { AbstractSyntaxTokenBackend, AttachedViews } from './abstractSyntaxTokenBackend.js';
20
import { TreeSitterSyntaxTokenBackend } from './treeSitter/treeSitterSyntaxTokenBackend.js';
21
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../../textModelEvents.js';
22
import { ITokenizationTextModelPart } from '../../tokenizationTextModelPart.js';
23
import { LineTokens } from '../../tokens/lineTokens.js';
24
import { SparseMultilineTokens } from '../../tokens/sparseMultilineTokens.js';
25
import { SparseTokensStore } from '../../tokens/sparseTokensStore.js';
26
import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';
27
import { TokenizerSyntaxTokenBackend } from './tokenizerSyntaxTokenBackend.js';
28
import { ITreeSitterLibraryService } from '../../services/treeSitter/treeSitterLibraryService.js';
29
import { derived, IObservable, ISettableObservable, observableValue } from '../../../../base/common/observable.js';
30
31
export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
32
private readonly _semanticTokens: SparseTokensStore;
33
34
private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;
35
public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;
36
37
private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;
38
public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;
39
40
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;
41
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;
42
43
public readonly tokens: IObservable<AbstractSyntaxTokenBackend>;
44
private readonly _useTreeSitter: IObservable<boolean>;
45
private readonly _languageIdObs: ISettableObservable<string>;
46
47
constructor(
48
private readonly _textModel: TextModel,
49
private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,
50
private _languageId: string,
51
private readonly _attachedViews: AttachedViews,
52
@ILanguageService private readonly _languageService: ILanguageService,
53
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
54
@IInstantiationService private readonly _instantiationService: IInstantiationService,
55
@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,
56
) {
57
super();
58
59
this._languageIdObs = observableValue(this, this._languageId);
60
61
this._useTreeSitter = derived(this, reader => {
62
const languageId = this._languageIdObs.read(reader);
63
return this._treeSitterLibraryService.supportsLanguage(languageId, reader);
64
});
65
66
this.tokens = derived(this, reader => {
67
let tokens: AbstractSyntaxTokenBackend;
68
if (this._useTreeSitter.read(reader)) {
69
tokens = reader.store.add(this._instantiationService.createInstance(
70
TreeSitterSyntaxTokenBackend,
71
this._languageIdObs,
72
this._languageService.languageIdCodec,
73
this._textModel,
74
this._attachedViews.visibleLineRanges
75
));
76
} else {
77
tokens = reader.store.add(new TokenizerSyntaxTokenBackend(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));
78
}
79
80
reader.store.add(tokens.onDidChangeTokens(e => {
81
this._emitModelTokensChangedEvent(e);
82
}));
83
84
reader.store.add(tokens.onDidChangeBackgroundTokenizationState(e => {
85
this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
86
}));
87
return tokens;
88
});
89
90
let hadTokens = false;
91
this.tokens.recomputeInitiallyAndOnChange(this._store, value => {
92
if (hadTokens) {
93
// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.
94
// TODO@hediet: Look into why this is needed.
95
value.todo_resetTokenization();
96
}
97
hadTokens = true;
98
});
99
100
this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);
101
this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());
102
this.onDidChangeLanguage = this._onDidChangeLanguage.event;
103
this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());
104
this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;
105
this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());
106
this.onDidChangeTokens = this._onDidChangeTokens.event;
107
}
108
109
_hasListeners(): boolean {
110
return (this._onDidChangeLanguage.hasListeners()
111
|| this._onDidChangeLanguageConfiguration.hasListeners()
112
|| this._onDidChangeTokens.hasListeners());
113
}
114
115
public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {
116
if (e.affects(this._languageId)) {
117
this._onDidChangeLanguageConfiguration.fire({});
118
}
119
}
120
121
public handleDidChangeContent(e: IModelContentChangedEvent): void {
122
if (e.isFlush) {
123
this._semanticTokens.flush();
124
} else if (!e.isEolChange) { // We don't have to do anything on an EOL change
125
for (const c of e.changes) {
126
const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);
127
128
this._semanticTokens.acceptEdit(
129
c.range,
130
eolCount,
131
firstLineLength,
132
lastLineLength,
133
c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null
134
);
135
}
136
}
137
138
this.tokens.get().handleDidChangeContent(e);
139
}
140
141
public handleDidChangeAttached(): void {
142
this.tokens.get().handleDidChangeAttached();
143
}
144
145
/**
146
* Includes grammar and semantic tokens.
147
*/
148
public getLineTokens(lineNumber: number): LineTokens {
149
this.validateLineNumber(lineNumber);
150
const syntacticTokens = this.tokens.get().getLineTokens(lineNumber);
151
return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);
152
}
153
154
private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {
155
if (!this._textModel._isDisposing()) {
156
this._bracketPairsTextModelPart.handleDidChangeTokens(e);
157
this._onDidChangeTokens.fire(e);
158
}
159
}
160
161
// #region Grammar Tokens
162
163
private validateLineNumber(lineNumber: number): void {
164
if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {
165
throw new BugIndicatingError('Illegal value for lineNumber');
166
}
167
}
168
169
public get hasTokens(): boolean {
170
return this.tokens.get().hasTokens;
171
}
172
173
public resetTokenization() {
174
this.tokens.get().todo_resetTokenization();
175
}
176
177
public get backgroundTokenizationState() {
178
return this.tokens.get().backgroundTokenizationState;
179
}
180
181
public forceTokenization(lineNumber: number): void {
182
this.validateLineNumber(lineNumber);
183
this.tokens.get().forceTokenization(lineNumber);
184
}
185
186
public hasAccurateTokensForLine(lineNumber: number): boolean {
187
this.validateLineNumber(lineNumber);
188
return this.tokens.get().hasAccurateTokensForLine(lineNumber);
189
}
190
191
public isCheapToTokenize(lineNumber: number): boolean {
192
this.validateLineNumber(lineNumber);
193
return this.tokens.get().isCheapToTokenize(lineNumber);
194
}
195
196
public tokenizeIfCheap(lineNumber: number): void {
197
this.validateLineNumber(lineNumber);
198
this.tokens.get().tokenizeIfCheap(lineNumber);
199
}
200
201
public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {
202
return this.tokens.get().getTokenTypeIfInsertingCharacter(lineNumber, column, character);
203
}
204
205
public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {
206
return this.tokens.get().tokenizeLinesAt(lineNumber, lines);
207
}
208
209
// #endregion
210
211
// #region Semantic Tokens
212
213
public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {
214
this._semanticTokens.set(tokens, isComplete, this._textModel);
215
216
this._emitModelTokensChangedEvent({
217
semanticTokensApplied: tokens !== null,
218
ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],
219
});
220
}
221
222
public hasCompleteSemanticTokens(): boolean {
223
return this._semanticTokens.isComplete();
224
}
225
226
public hasSomeSemanticTokens(): boolean {
227
return !this._semanticTokens.isEmpty();
228
}
229
230
public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {
231
if (this.hasCompleteSemanticTokens()) {
232
return;
233
}
234
const changedRange = this._textModel.validateRange(
235
this._semanticTokens.setPartial(range, tokens)
236
);
237
238
this._emitModelTokensChangedEvent({
239
semanticTokensApplied: true,
240
ranges: [
241
{
242
fromLineNumber: changedRange.startLineNumber,
243
toLineNumber: changedRange.endLineNumber,
244
},
245
],
246
});
247
}
248
249
// #endregion
250
251
// #region Utility Methods
252
253
public getWordAtPosition(_position: IPosition): IWordAtPosition | null {
254
this.assertNotDisposed();
255
256
const position = this._textModel.validatePosition(_position);
257
const lineContent = this._textModel.getLineContent(position.lineNumber);
258
const lineTokens = this.getLineTokens(position.lineNumber);
259
const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
260
261
// (1). First try checking right biased word
262
const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);
263
const rightBiasedWord = getWordAtText(
264
position.column,
265
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),
266
lineContent.substring(rbStartOffset, rbEndOffset),
267
rbStartOffset
268
);
269
// Make sure the result touches the original passed in position
270
if (
271
rightBiasedWord &&
272
rightBiasedWord.startColumn <= _position.column &&
273
_position.column <= rightBiasedWord.endColumn
274
) {
275
return rightBiasedWord;
276
}
277
278
// (2). Else, if we were at a language boundary, check the left biased word
279
if (tokenIndex > 0 && rbStartOffset === position.column - 1) {
280
// edge case, where `position` sits between two tokens belonging to two different languages
281
const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(
282
lineTokens,
283
tokenIndex - 1
284
);
285
const leftBiasedWord = getWordAtText(
286
position.column,
287
this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),
288
lineContent.substring(lbStartOffset, lbEndOffset),
289
lbStartOffset
290
);
291
// Make sure the result touches the original passed in position
292
if (
293
leftBiasedWord &&
294
leftBiasedWord.startColumn <= _position.column &&
295
_position.column <= leftBiasedWord.endColumn
296
) {
297
return leftBiasedWord;
298
}
299
}
300
301
return null;
302
}
303
304
private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {
305
return this._languageConfigurationService.getLanguageConfiguration(languageId);
306
}
307
308
private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {
309
const languageId = lineTokens.getLanguageId(tokenIndex);
310
311
// go left until a different language is hit
312
let startOffset = 0;
313
for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {
314
startOffset = lineTokens.getStartOffset(i);
315
}
316
317
// go right until a different language is hit
318
let endOffset = lineTokens.getLineContent().length;
319
for (
320
let i = tokenIndex, tokenCount = lineTokens.getCount();
321
i < tokenCount && lineTokens.getLanguageId(i) === languageId;
322
i++
323
) {
324
endOffset = lineTokens.getEndOffset(i);
325
}
326
327
return [startOffset, endOffset];
328
}
329
330
public getWordUntilPosition(position: IPosition): IWordAtPosition {
331
const wordAtPosition = this.getWordAtPosition(position);
332
if (!wordAtPosition) {
333
return { word: '', startColumn: position.column, endColumn: position.column, };
334
}
335
return {
336
word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),
337
startColumn: wordAtPosition.startColumn,
338
endColumn: position.column,
339
};
340
}
341
342
// #endregion
343
344
// #region Language Id handling
345
346
public getLanguageId(): string {
347
return this._languageId;
348
}
349
350
public getLanguageIdAtPosition(lineNumber: number, column: number): string {
351
const position = this._textModel.validatePosition(new Position(lineNumber, column));
352
const lineTokens = this.getLineTokens(position.lineNumber);
353
return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));
354
}
355
356
public setLanguageId(languageId: string, source: string = 'api'): void {
357
if (this._languageId === languageId) {
358
// There's nothing to do
359
return;
360
}
361
362
const e: IModelLanguageChangedEvent = {
363
oldLanguage: this._languageId,
364
newLanguage: languageId,
365
source
366
};
367
368
this._languageId = languageId;
369
this._languageIdObs.set(languageId, undefined);
370
this._bracketPairsTextModelPart.handleDidChangeLanguage(e);
371
372
this._onDidChangeLanguage.fire(e);
373
this._onDidChangeLanguageConfiguration.fire({});
374
}
375
376
// #endregion
377
}
378
379