Path: blob/main/src/vs/editor/common/model/tokens/tokenizationTextModelPart.ts
5240 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../../base/common/charCode.js';6import { BugIndicatingError } from '../../../../base/common/errors.js';7import { Emitter, Event } from '../../../../base/common/event.js';8import { countEOL } from '../../core/misc/eolCounter.js';9import { IPosition, Position } from '../../core/position.js';10import { Range } from '../../core/range.js';11import { IWordAtPosition, getWordAtText } from '../../core/wordHelper.js';12import { StandardTokenType } from '../../encodedTokenAttributes.js';13import { ILanguageService } from '../../languages/language.js';14import { ILanguageConfigurationService, LanguageConfigurationServiceChangeEvent, ResolvedLanguageConfiguration } from '../../languages/languageConfigurationRegistry.js';15import { BracketPairsTextModelPart } from '../bracketPairsTextModelPart/bracketPairsImpl.js';16import { TextModel } from '../textModel.js';17import { TextModelPart } from '../textModelPart.js';18import { AbstractSyntaxTokenBackend, AttachedViews } from './abstractSyntaxTokenBackend.js';19import { TreeSitterSyntaxTokenBackend } from './treeSitter/treeSitterSyntaxTokenBackend.js';20import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent, IModelFontTokensChangedEvent } from '../../textModelEvents.js';21import { ITokenizationTextModelPart } from '../../tokenizationTextModelPart.js';22import { LineTokens } from '../../tokens/lineTokens.js';23import { SparseMultilineTokens } from '../../tokens/sparseMultilineTokens.js';24import { SparseTokensStore } from '../../tokens/sparseTokensStore.js';25import { IInstantiationService } from '../../../../platform/instantiation/common/instantiation.js';26import { TokenizerSyntaxTokenBackend } from './tokenizerSyntaxTokenBackend.js';27import { ITreeSitterLibraryService } from '../../services/treeSitter/treeSitterLibraryService.js';28import { derived, IObservable, ISettableObservable, observableValue } from '../../../../base/common/observable.js';2930export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {31private readonly _semanticTokens: SparseTokensStore;3233private readonly _onDidChangeLanguage: Emitter<IModelLanguageChangedEvent>;34public readonly onDidChangeLanguage: Event<IModelLanguageChangedEvent>;3536private readonly _onDidChangeLanguageConfiguration: Emitter<IModelLanguageConfigurationChangedEvent>;37public readonly onDidChangeLanguageConfiguration: Event<IModelLanguageConfigurationChangedEvent>;3839private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent>;40public readonly onDidChangeTokens: Event<IModelTokensChangedEvent>;4142private readonly _onDidChangeFontTokens: Emitter<IModelFontTokensChangedEvent> = this._register(new Emitter<IModelFontTokensChangedEvent>());43public readonly onDidChangeFontTokens: Event<IModelFontTokensChangedEvent> = this._onDidChangeFontTokens.event;4445public readonly tokens: IObservable<AbstractSyntaxTokenBackend>;46private readonly _useTreeSitter: IObservable<boolean>;47private readonly _languageIdObs: ISettableObservable<string>;4849constructor(50private readonly _textModel: TextModel,51private readonly _bracketPairsTextModelPart: BracketPairsTextModelPart,52private _languageId: string,53private readonly _attachedViews: AttachedViews,54@ILanguageService private readonly _languageService: ILanguageService,55@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,56@IInstantiationService private readonly _instantiationService: IInstantiationService,57@ITreeSitterLibraryService private readonly _treeSitterLibraryService: ITreeSitterLibraryService,58) {59super();6061this._languageIdObs = observableValue(this, this._languageId);6263this._useTreeSitter = derived(this, reader => {64const languageId = this._languageIdObs.read(reader);65return this._treeSitterLibraryService.supportsLanguage(languageId, reader);66});6768this.tokens = derived(this, reader => {69let tokens: AbstractSyntaxTokenBackend;70if (this._useTreeSitter.read(reader)) {71tokens = reader.store.add(this._instantiationService.createInstance(72TreeSitterSyntaxTokenBackend,73this._languageIdObs,74this._languageService.languageIdCodec,75this._textModel,76this._attachedViews.visibleLineRanges77));78} else {79tokens = reader.store.add(new TokenizerSyntaxTokenBackend(this._languageService.languageIdCodec, this._textModel, () => this._languageId, this._attachedViews));80}8182reader.store.add(tokens.onDidChangeTokens(e => {83this._emitModelTokensChangedEvent(e);84}));85reader.store.add(tokens.onDidChangeFontTokens(e => {86if (!this._textModel._isDisposing()) {87this._onDidChangeFontTokens.fire(e);88}89}));9091reader.store.add(tokens.onDidChangeBackgroundTokenizationState(e => {92this._bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();93}));94return tokens;95});9697let hadTokens = false;98this.tokens.recomputeInitiallyAndOnChange(this._store, value => {99if (hadTokens) {100// We need to reset the tokenization, as the new token provider otherwise won't have a chance to provide tokens until some action happens in the editor.101// TODO@hediet: Look into why this is needed.102value.todo_resetTokenization();103}104hadTokens = true;105});106107this._semanticTokens = new SparseTokensStore(this._languageService.languageIdCodec);108this._onDidChangeLanguage = this._register(new Emitter<IModelLanguageChangedEvent>());109this.onDidChangeLanguage = this._onDidChangeLanguage.event;110this._onDidChangeLanguageConfiguration = this._register(new Emitter<IModelLanguageConfigurationChangedEvent>());111this.onDidChangeLanguageConfiguration = this._onDidChangeLanguageConfiguration.event;112this._onDidChangeTokens = this._register(new Emitter<IModelTokensChangedEvent>());113this.onDidChangeTokens = this._onDidChangeTokens.event;114this._onDidChangeFontTokens = this._register(new Emitter<IModelFontTokensChangedEvent>());115this.onDidChangeFontTokens = this._onDidChangeFontTokens.event;116}117118_hasListeners(): boolean {119// Note: _onDidChangeFontTokens is intentionally excluded because it's an internal event120// that TokenizationFontDecorationProvider subscribes to during TextModel construction121return (this._onDidChangeLanguage.hasListeners()122|| this._onDidChangeLanguageConfiguration.hasListeners()123|| this._onDidChangeTokens.hasListeners());124}125126public handleLanguageConfigurationServiceChange(e: LanguageConfigurationServiceChangeEvent): void {127if (e.affects(this._languageId)) {128this._onDidChangeLanguageConfiguration.fire({});129}130}131132public handleDidChangeContent(e: IModelContentChangedEvent): void {133if (e.isFlush) {134this._semanticTokens.flush();135} else if (!e.isEolChange) { // We don't have to do anything on an EOL change136for (const c of e.changes) {137const [eolCount, firstLineLength, lastLineLength] = countEOL(c.text);138139this._semanticTokens.acceptEdit(140c.range,141eolCount,142firstLineLength,143lastLineLength,144c.text.length > 0 ? c.text.charCodeAt(0) : CharCode.Null145);146}147}148149this.tokens.get().handleDidChangeContent(e);150}151152public handleDidChangeAttached(): void {153this.tokens.get().handleDidChangeAttached();154}155156/**157* Includes grammar and semantic tokens.158*/159public getLineTokens(lineNumber: number): LineTokens {160this.validateLineNumber(lineNumber);161const syntacticTokens = this.tokens.get().getLineTokens(lineNumber);162return this._semanticTokens.addSparseTokens(lineNumber, syntacticTokens);163}164165private _emitModelTokensChangedEvent(e: IModelTokensChangedEvent): void {166if (!this._textModel._isDisposing()) {167this._bracketPairsTextModelPart.handleDidChangeTokens(e);168this._onDidChangeTokens.fire(e);169}170}171172// #region Grammar Tokens173174private validateLineNumber(lineNumber: number): void {175if (lineNumber < 1 || lineNumber > this._textModel.getLineCount()) {176throw new BugIndicatingError('Illegal value for lineNumber');177}178}179180public get hasTokens(): boolean {181return this.tokens.get().hasTokens;182}183184public resetTokenization() {185this.tokens.get().todo_resetTokenization();186}187188public get backgroundTokenizationState() {189return this.tokens.get().backgroundTokenizationState;190}191192public forceTokenization(lineNumber: number): void {193this.validateLineNumber(lineNumber);194this.tokens.get().forceTokenization(lineNumber);195}196197public hasAccurateTokensForLine(lineNumber: number): boolean {198this.validateLineNumber(lineNumber);199return this.tokens.get().hasAccurateTokensForLine(lineNumber);200}201202public isCheapToTokenize(lineNumber: number): boolean {203this.validateLineNumber(lineNumber);204return this.tokens.get().isCheapToTokenize(lineNumber);205}206207public tokenizeIfCheap(lineNumber: number): void {208this.validateLineNumber(lineNumber);209this.tokens.get().tokenizeIfCheap(lineNumber);210}211212public getTokenTypeIfInsertingCharacter(lineNumber: number, column: number, character: string): StandardTokenType {213return this.tokens.get().getTokenTypeIfInsertingCharacter(lineNumber, column, character);214}215216public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {217return this.tokens.get().tokenizeLinesAt(lineNumber, lines);218}219220// #endregion221222// #region Semantic Tokens223224public setSemanticTokens(tokens: SparseMultilineTokens[] | null, isComplete: boolean): void {225this._semanticTokens.set(tokens, isComplete, this._textModel);226227this._emitModelTokensChangedEvent({228semanticTokensApplied: tokens !== null,229ranges: [{ fromLineNumber: 1, toLineNumber: this._textModel.getLineCount() }],230});231}232233public hasCompleteSemanticTokens(): boolean {234return this._semanticTokens.isComplete();235}236237public hasSomeSemanticTokens(): boolean {238return !this._semanticTokens.isEmpty();239}240241public setPartialSemanticTokens(range: Range, tokens: SparseMultilineTokens[]): void {242if (this.hasCompleteSemanticTokens()) {243return;244}245const changedRange = this._textModel.validateRange(246this._semanticTokens.setPartial(range, tokens)247);248249this._emitModelTokensChangedEvent({250semanticTokensApplied: true,251ranges: [252{253fromLineNumber: changedRange.startLineNumber,254toLineNumber: changedRange.endLineNumber,255},256],257});258}259260// #endregion261262// #region Utility Methods263264public getWordAtPosition(_position: IPosition): IWordAtPosition | null {265this.assertNotDisposed();266267const position = this._textModel.validatePosition(_position);268const lineContent = this._textModel.getLineContent(position.lineNumber);269const lineTokens = this.getLineTokens(position.lineNumber);270const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);271272// (1). First try checking right biased word273const [rbStartOffset, rbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(lineTokens, tokenIndex);274const rightBiasedWord = getWordAtText(275position.column,276this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex)).getWordDefinition(),277lineContent.substring(rbStartOffset, rbEndOffset),278rbStartOffset279);280// Make sure the result touches the original passed in position281if (282rightBiasedWord &&283rightBiasedWord.startColumn <= _position.column &&284_position.column <= rightBiasedWord.endColumn285) {286return rightBiasedWord;287}288289// (2). Else, if we were at a language boundary, check the left biased word290if (tokenIndex > 0 && rbStartOffset === position.column - 1) {291// edge case, where `position` sits between two tokens belonging to two different languages292const [lbStartOffset, lbEndOffset] = TokenizationTextModelPart._findLanguageBoundaries(293lineTokens,294tokenIndex - 1295);296const leftBiasedWord = getWordAtText(297position.column,298this.getLanguageConfiguration(lineTokens.getLanguageId(tokenIndex - 1)).getWordDefinition(),299lineContent.substring(lbStartOffset, lbEndOffset),300lbStartOffset301);302// Make sure the result touches the original passed in position303if (304leftBiasedWord &&305leftBiasedWord.startColumn <= _position.column &&306_position.column <= leftBiasedWord.endColumn307) {308return leftBiasedWord;309}310}311312return null;313}314315private getLanguageConfiguration(languageId: string): ResolvedLanguageConfiguration {316return this._languageConfigurationService.getLanguageConfiguration(languageId);317}318319private static _findLanguageBoundaries(lineTokens: LineTokens, tokenIndex: number): [number, number] {320const languageId = lineTokens.getLanguageId(tokenIndex);321322// go left until a different language is hit323let startOffset = 0;324for (let i = tokenIndex; i >= 0 && lineTokens.getLanguageId(i) === languageId; i--) {325startOffset = lineTokens.getStartOffset(i);326}327328// go right until a different language is hit329let endOffset = lineTokens.getLineContent().length;330for (331let i = tokenIndex, tokenCount = lineTokens.getCount();332i < tokenCount && lineTokens.getLanguageId(i) === languageId;333i++334) {335endOffset = lineTokens.getEndOffset(i);336}337338return [startOffset, endOffset];339}340341public getWordUntilPosition(position: IPosition): IWordAtPosition {342const wordAtPosition = this.getWordAtPosition(position);343if (!wordAtPosition) {344return { word: '', startColumn: position.column, endColumn: position.column, };345}346return {347word: wordAtPosition.word.substr(0, position.column - wordAtPosition.startColumn),348startColumn: wordAtPosition.startColumn,349endColumn: position.column,350};351}352353// #endregion354355// #region Language Id handling356357public getLanguageId(): string {358return this._languageId;359}360361public getLanguageIdAtPosition(lineNumber: number, column: number): string {362const position = this._textModel.validatePosition(new Position(lineNumber, column));363const lineTokens = this.getLineTokens(position.lineNumber);364return lineTokens.getLanguageId(lineTokens.findTokenIndexAtOffset(position.column - 1));365}366367public setLanguageId(languageId: string, source: string = 'api'): void {368if (this._languageId === languageId) {369// There's nothing to do370return;371}372373const e: IModelLanguageChangedEvent = {374oldLanguage: this._languageId,375newLanguage: languageId,376source377};378379this._languageId = languageId;380this._languageIdObs.set(languageId, undefined);381this._bracketPairsTextModelPart.handleDidChangeLanguage(e);382383this._onDidChangeLanguage.fire(e);384this._onDidChangeLanguageConfiguration.fire({});385}386387// #endregion388}389390391