Path: blob/main/src/vs/editor/common/model/tokens/tokenizationTextModelPart.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../../base/common/charCode.js';6import { BugIndicatingError } from '../../../../base/common/errors.js';7import { Emitter, Event } from '../../../../base/common/event.js';8import { countEOL } from '../../core/misc/eolCounter.js';9import { IPosition, Position } from '../../core/position.js';10import { Range } from '../../core/range.js';11import { IWordAtPosition, getWordAtText } from '../../core/wordHelper.js';12import { StandardTokenType } from '../../encodedTokenAttributes.js';13import { ILanguageService } from '../../languages/language.js';14import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../../languages/languageConfigurationRegistry.js';15import { BracketPairsTextModelPart } from '../bracketPairsTextModelPart/bracketPairsImpl.js';16import { TextModel } from '../textModel.js';17import { TextModelPart } from '../textModelPart.js';18import { AbstractSyntaxTokenBackend, AttachedViews } from './abstractSyntaxTokenBackend.js';19import { TreeSitterSyntaxTokenBackend } from './treeSitter/treeSitterSyntaxTokenBackend.js';20import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../../textModelEvents.js';21import { ITokenizationTextModelPart } from '../../tokenizationTextModelPart.js';22import { LineTokens } from '../../tokens/lineTokens.js';23import { SparseMultilineTokens } from '../../tokens/sparseMultilineTokens.js';24import { SparseTokensStore } from '../../tokens/sparseTokensStore.js';25import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';26import { TokenizerSyntaxTokenBackend } from './tokenizerSyntaxTokenBackend.js';27import { ITreeSitterLibraryService } from '../../services/treeSitter/treeSitterLibraryService.js';28import { derived, IObservable, ISettableObservable, observableValue } from '../../../../base/common/observable.js';2930export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {31private readonly _semanticTokens: SparseTokensStore;3233private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;34public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;3536private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;37public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;3839private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;40public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;4142public readonly tokens: IObservable<AbstractSyntaxTokenBackend>;43private readonly _useTreeSitter: IObservable<boolean>;44private readonly _languageIdObs: ISettableObservable<string>;4546constructor(47private readonly _textModel: TextModel,48private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,49private _languageId: string,50private readonly _attachedViews: AttachedViews,51@ILanguageService private readonly _languageService: ILanguageService,52@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,53@IInstantiationService private readonly _instantiationService: IInstantiationService,54@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,55) {56super();5758this._languageIdObs = observableValue(this, this._languageId);5960this._useTreeSitter = derived(this, reader => {61const languageId = this._languageIdObs.read(reader);62return this._treeSitterLibraryService.supportsLanguage(languageId, reader);63});6465this.tokens = derived(this, reader => {66let tokens: AbstractSyntaxTokenBackend;67if (this._useTreeSitter.read(reader)) {68tokens = reader.store.add(this._instantiationService.createInstance(69TreeSitterSyntaxTokenBackend,70this._languageIdObs,71this._languageService.languageIdCodec,72this._textModel,73this._attachedViews.visibleLineRanges74));75} else {76tokens = reader.store.add(new TokenizerSyntaxTokenBackend(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));77}7879reader.store.add(tokens.onDidChangeTokens(e => {80this._emitModelTokensChangedEvent(e);81}));8283reader.store.add(tokens.onDidChangeBackgroundTokenizationState(e => {84this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();85}));86return tokens;87});8889let hadTokens = false;90this.tokens.recomputeInitiallyAndOnChange(this._store, value => {91if (hadTokens) {92// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.93// TODO@hediet: Look into why this is needed.94value.todo_resetTokenization();95}96hadTokens = true;97});9899this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);100this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());101this.onDidChangeLanguage = this._onDidChangeLanguage.event;102this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());103this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;104this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());105this.onDidChangeTokens = this._onDidChangeTokens.event;106}107108_hasListeners(): boolean {109return (this._onDidChangeLanguage.hasListeners()110|| this._onDidChangeLanguageConfiguration.hasListeners()111|| this._onDidChangeTokens.hasListeners());112}113114public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {115if (e.affects(this._languageId)) {116this._onDidChangeLanguageConfiguration.fire({});117}118}119120public handleDidChangeContent(e: IModelContentChangedEvent): void {121if (e.isFlush) {122this._semanticTokens.flush();123} else if (!e.isEolChange) { // We don't have to do anything on an EOL change124for (const c of e.changes) {125const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);126127this._semanticTokens.acceptEdit(128c.range,129eolCount,130firstLineLength,131lastLineLength,132c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null133);134}135}136137this.tokens.get().handleDidChangeContent(e);138}139140public handleDidChangeAttached(): void {141this.tokens.get().handleDidChangeAttached();142}143144/**145* Includes grammar and semantic tokens.146*/147public getLineTokens(lineNumber: number): LineTokens {148this.validateLineNumber(lineNumber);149const syntacticTokens = this.tokens.get().getLineTokens(lineNumber);150return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);151}152153private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {154if (!this._textModel._isDisposing()) {155this._bracketPairsTextModelPart.handleDidChangeTokens(e);156this._onDidChangeTokens.fire(e);157}158}159160// #region Grammar Tokens161162private validateLineNumber(lineNumber: number): void {163if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {164throw new BugIndicatingError('Illegal value for lineNumber');165}166}167168public get hasTokens(): boolean {169return this.tokens.get().hasTokens;170}171172public resetTokenization() {173this.tokens.get().todo_resetTokenization();174}175176public get backgroundTokenizationState() {177return this.tokens.get().backgroundTokenizationState;178}179180public forceTokenization(lineNumber: number): void {181this.validateLineNumber(lineNumber);182this.tokens.get().forceTokenization(lineNumber);183}184185public hasAccurateTokensForLine(lineNumber: number): boolean {186this.validateLineNumber(lineNumber);187return this.tokens.get().hasAccurateTokensForLine(lineNumber);188}189190public isCheapToTokenize(lineNumber: number): boolean {191this.validateLineNumber(lineNumber);192return this.tokens.get().isCheapToTokenize(lineNumber);193}194195public tokenizeIfCheap(lineNumber: number): void {196this.validateLineNumber(lineNumber);197this.tokens.get().tokenizeIfCheap(lineNumber);198}199200public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {201return this.tokens.get().getTokenTypeIfInsertingCharacter(lineNumber, column, character);202}203204public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {205return this.tokens.get().tokenizeLinesAt(lineNumber, lines);206}207208// #endregion209210// #region Semantic Tokens211212public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {213this._semanticTokens.set(tokens, isComplete, this._textModel);214215this._emitModelTokensChangedEvent({216semanticTokensApplied: tokens !== null,217ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],218});219}220221public hasCompleteSemanticTokens(): boolean {222return this._semanticTokens.isComplete();223}224225public hasSomeSemanticTokens(): boolean {226return !this._semanticTokens.isEmpty();227}228229public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {230if (this.hasCompleteSemanticTokens()) {231return;232}233const changedRange = this._textModel.validateRange(234this._semanticTokens.setPartial(range, tokens)235);236237this._emitModelTokensChangedEvent({238semanticTokensApplied: true,239ranges: [240{241fromLineNumber: changedRange.startLineNumber,242toLineNumber: changedRange.endLineNumber,243},244],245});246}247248// #endregion249250// #region Utility Methods251252public getWordAtPosition(_position: IPosition): IWordAtPosition | null {253this.assertNotDisposed();254255const position = this._textModel.validatePosition(_position);256const lineContent = this._textModel.getLineContent(position.lineNumber);257const lineTokens = this.getLineTokens(position.lineNumber);258const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);259260// (1). First try checking right biased word261const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);262const rightBiasedWord = getWordAtText(263position.column,264this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),265lineContent.substring(rbStartOffset, rbEndOffset),266rbStartOffset267);268// Make sure the result touches the original passed in position269if (270rightBiasedWord &&271rightBiasedWord.startColumn <= _position.column &&272_position.column <= rightBiasedWord.endColumn273) {274return rightBiasedWord;275}276277// (2). Else, if we were at a language boundary, check the left biased word278if (tokenIndex > 0 && rbStartOffset === position.column - 1) {279// edge case, where `position` sits between two tokens belonging to two different languages280const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(281lineTokens,282tokenIndex - 1283);284const leftBiasedWord = getWordAtText(285position.column,286this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),287lineContent.substring(lbStartOffset, lbEndOffset),288lbStartOffset289);290// Make sure the result touches the original passed in position291if (292leftBiasedWord &&293leftBiasedWord.startColumn <= _position.column &&294_position.column <= leftBiasedWord.endColumn295) {296return leftBiasedWord;297}298}299300return null;301}302303private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {304return this._languageConfigurationService.getLanguageConfiguration(languageId);305}306307private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {308const languageId = lineTokens.getLanguageId(tokenIndex);309310// go left until a different language is hit311let startOffset = 0;312for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {313startOffset = lineTokens.getStartOffset(i);314}315316// go right until a different language is hit317let endOffset = lineTokens.getLineContent().length;318for (319let i = tokenIndex, tokenCount = lineTokens.getCount();320i < tokenCount && lineTokens.getLanguageId(i) === languageId;321i++322) {323endOffset = lineTokens.getEndOffset(i);324}325326return [startOffset, endOffset];327}328329public getWordUntilPosition(position: IPosition): IWordAtPosition {330const wordAtPosition = this.getWordAtPosition(position);331if (!wordAtPosition) {332return { word: '', startColumn: position.column, endColumn: position.column, };333}334return {335word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),336startColumn: wordAtPosition.startColumn,337endColumn: position.column,338};339}340341// #endregion342343// #region Language Id handling344345public getLanguageId(): string {346return this._languageId;347}348349public getLanguageIdAtPosition(lineNumber: number, column: number): string {350const position = this._textModel.validatePosition(new Position(lineNumber, column));351const lineTokens = this.getLineTokens(position.lineNumber);352return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));353}354355public setLanguageId(languageId: string, source: string = 'api'): void {356if (this._languageId === languageId) {357// There's nothing to do358return;359}360361const e: IModelLanguageChangedEvent = {362oldLanguage: this._languageId,363newLanguage: languageId,364source365};366367this._languageId = languageId;368this._languageIdObs.set(languageId, undefined);369this._bracketPairsTextModelPart.handleDidChangeLanguage(e);370371this._onDidChangeLanguage.fire(e);372this._onDidChangeLanguageConfiguration.fire({});373}374375// #endregion376}377378379