Path: blob/main/src/vs/editor/common/model/textModelTokens.ts
3294 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { IdleDeadline, runWhenGlobalIdle } from '../../../base/common/async.js';6import { BugIndicatingError, onUnexpectedError } from '../../../base/common/errors.js';7import { setTimeout0 } from '../../../base/common/platform.js';8import { StopWatch } from '../../../base/common/stopwatch.js';9import { countEOL } from '../core/misc/eolCounter.js';10import { LineRange } from '../core/ranges/lineRange.js';11import { OffsetRange } from '../core/ranges/offsetRange.js';12import { Position } from '../core/position.js';13import { StandardTokenType } from '../encodedTokenAttributes.js';14import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport } from '../languages.js';15import { nullTokenizeEncoded } from '../languages/nullTokenize.js';16import { ITextModel } from '../model.js';17import { FixedArray } from './fixedArray.js';18import { IModelContentChange } from './mirrorTextModel.js';19import { ContiguousMultilineTokensBuilder } from '../tokens/contiguousMultilineTokensBuilder.js';20import { LineTokens } from '../tokens/lineTokens.js';2122const enum Constants {23CHEAP_TOKENIZATION_LENGTH_LIMIT = 204824}2526export class TokenizerWithStateStore<TState extends IState = IState> {27private readonly initialState;2829public readonly store: TrackingTokenizationStateStore<TState>;3031constructor(32lineCount: number,33public readonly tokenizationSupport: ITokenizationSupport34) {35this.initialState = this.tokenizationSupport.getInitialState() as TState;36this.store = new TrackingTokenizationStateStore<TState>(lineCount);37}3839public getStartState(lineNumber: number): TState | null {40return this.store.getStartState(lineNumber, this.initialState);41}4243public getFirstInvalidLine(): { lineNumber: number; startState: TState } | null {44return this.store.getFirstInvalidLine(this.initialState);45}46}4748export class TokenizerWithStateStoreAndTextModel<TState extends IState = IState> extends TokenizerWithStateStore<TState> {49constructor(50lineCount: number,51tokenizationSupport: ITokenizationSupport,52public readonly _textModel: ITextModel,53public readonly _languageIdCodec: ILanguageIdCodec54) {55super(lineCount, tokenizationSupport);56}5758public updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {59const languageId = this._textModel.getLanguageId();6061while (true) {62const lineToTokenize = this.getFirstInvalidLine();63if (!lineToTokenize || lineToTokenize.lineNumber > lineNumber) {64break;65}6667const text = this._textModel.getLineContent(lineToTokenize.lineNumber);6869const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, lineToTokenize.startState);70builder.add(lineToTokenize.lineNumber, r.tokens);71this.store.setEndState(lineToTokenize.lineNumber, r.endState as TState);72}73}7475/** assumes state is up to date */76public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType {77// TODO@hediet: use tokenizeLineWithEdit78const lineStartState = this.getStartState(position.lineNumber);79if (!lineStartState) {80return StandardTokenType.Other;81}8283const languageId = this._textModel.getLanguageId();84const lineContent = this._textModel.getLineContent(position.lineNumber);8586// Create the text as if `character` was inserted87const text = (88lineContent.substring(0, position.column - 1)89+ character90+ lineContent.substring(position.column - 1)91);9293const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState);94const lineTokens = new LineTokens(r.tokens, text, this._languageIdCodec);95if (lineTokens.getCount() === 0) {96return StandardTokenType.Other;97}9899const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);100return lineTokens.getStandardTokenType(tokenIndex);101}102103/** assumes state is up to date */104public tokenizeLinesAt(lineNumber: number, lines: string[]): LineTokens[] | null {105const lineStartState: IState | null = this.getStartState(lineNumber);106if (!lineStartState) {107return null;108}109110const languageId = this._textModel.getLanguageId();111const result: LineTokens[] = [];112113let state = lineStartState;114for (const line of lines) {115const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, line, true, state);116result.push(new LineTokens(r.tokens, line, this._languageIdCodec));117state = r.endState;118}119120return result;121}122123public hasAccurateTokensForLine(lineNumber: number): boolean {124const firstInvalidLineNumber = this.store.getFirstInvalidEndStateLineNumberOrMax();125return (lineNumber < firstInvalidLineNumber);126}127128public isCheapToTokenize(lineNumber: number): boolean {129const firstInvalidLineNumber = this.store.getFirstInvalidEndStateLineNumberOrMax();130if (lineNumber < firstInvalidLineNumber) {131return true;132}133if (lineNumber === firstInvalidLineNumber134&& this._textModel.getLineLength(lineNumber) < Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT) {135return true;136}137138return false;139}140141/**142* The result is not cached.143*/144public tokenizeHeuristically(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): { heuristicTokens: boolean } {145if (endLineNumber <= this.store.getFirstInvalidEndStateLineNumberOrMax()) {146// nothing to do147return { heuristicTokens: false };148}149150if (startLineNumber <= this.store.getFirstInvalidEndStateLineNumberOrMax()) {151// tokenization has reached the viewport start...152this.updateTokensUntilLine(builder, endLineNumber);153return { heuristicTokens: false };154}155156let state = this.guessStartState(startLineNumber);157const languageId = this._textModel.getLanguageId();158159for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {160const text = this._textModel.getLineContent(lineNumber);161const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, text, true, state);162builder.add(lineNumber, r.tokens);163state = r.endState;164}165166return { heuristicTokens: true };167}168169private guessStartState(lineNumber: number): IState {170let { likelyRelevantLines, initialState } = findLikelyRelevantLines(this._textModel, lineNumber, this);171172if (!initialState) {173initialState = this.tokenizationSupport.getInitialState();174}175176const languageId = this._textModel.getLanguageId();177let state = initialState;178for (const line of likelyRelevantLines) {179const r = safeTokenize(this._languageIdCodec, languageId, this.tokenizationSupport, line, false, state);180state = r.endState;181}182return state;183}184}185186export function findLikelyRelevantLines(model: ITextModel, lineNumber: number, store?: TokenizerWithStateStore): { likelyRelevantLines: string[]; initialState?: IState } {187let nonWhitespaceColumn = model.getLineFirstNonWhitespaceColumn(lineNumber);188const likelyRelevantLines: string[] = [];189let initialState: IState | null | undefined = null;190for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {191const newNonWhitespaceIndex = model.getLineFirstNonWhitespaceColumn(i);192// Ignore lines full of whitespace193if (newNonWhitespaceIndex === 0) {194continue;195}196if (newNonWhitespaceIndex < nonWhitespaceColumn) {197likelyRelevantLines.push(model.getLineContent(i));198nonWhitespaceColumn = newNonWhitespaceIndex;199initialState = store?.getStartState(i);200if (initialState) {201break;202}203}204}205206likelyRelevantLines.reverse();207return { likelyRelevantLines, initialState: initialState ?? undefined };208}209210/**211* **Invariant:**212* If the text model is retokenized from line 1 to {@link getFirstInvalidEndStateLineNumber}() - 1,213* then the recomputed end state for line l will be equal to {@link getEndState}(l).214*/215export class TrackingTokenizationStateStore<TState extends IState> {216private readonly _tokenizationStateStore = new TokenizationStateStore<TState>();217private readonly _invalidEndStatesLineNumbers = new RangePriorityQueueImpl();218219constructor(private lineCount: number) {220this._invalidEndStatesLineNumbers.addRange(new OffsetRange(1, lineCount + 1));221}222223public getEndState(lineNumber: number): TState | null {224return this._tokenizationStateStore.getEndState(lineNumber);225}226227/**228* @returns if the end state has changed.229*/230public setEndState(lineNumber: number, state: TState): boolean {231if (!state) {232throw new BugIndicatingError('Cannot set null/undefined state');233}234235this._invalidEndStatesLineNumbers.delete(lineNumber);236const r = this._tokenizationStateStore.setEndState(lineNumber, state);237if (r && lineNumber < this.lineCount) {238// because the state changed, we cannot trust the next state anymore and have to invalidate it.239this._invalidEndStatesLineNumbers.addRange(new OffsetRange(lineNumber + 1, lineNumber + 2));240}241242return r;243}244245public acceptChange(range: LineRange, newLineCount: number): void {246this.lineCount += newLineCount - range.length;247this._tokenizationStateStore.acceptChange(range, newLineCount);248this._invalidEndStatesLineNumbers.addRangeAndResize(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive), newLineCount);249}250251public acceptChanges(changes: IModelContentChange[]) {252for (const c of changes) {253const [eolCount] = countEOL(c.text);254this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);255}256}257258public invalidateEndStateRange(range: LineRange): void {259this._invalidEndStatesLineNumbers.addRange(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive));260}261262public getFirstInvalidEndStateLineNumber(): number | null { return this._invalidEndStatesLineNumbers.min; }263264public getFirstInvalidEndStateLineNumberOrMax(): number {265return this.getFirstInvalidEndStateLineNumber() || Number.MAX_SAFE_INTEGER;266}267268public allStatesValid(): boolean { return this._invalidEndStatesLineNumbers.min === null; }269270public getStartState(lineNumber: number, initialState: TState): TState | null {271if (lineNumber === 1) { return initialState; }272return this.getEndState(lineNumber - 1);273}274275public getFirstInvalidLine(initialState: TState): { lineNumber: number; startState: TState } | null {276const lineNumber = this.getFirstInvalidEndStateLineNumber();277if (lineNumber === null) {278return null;279}280const startState = this.getStartState(lineNumber, initialState);281if (!startState) {282throw new BugIndicatingError('Start state must be defined');283}284285return { lineNumber, startState };286}287}288289export class TokenizationStateStore<TState extends IState> {290private readonly _lineEndStates = new FixedArray<TState | null>(null);291292public getEndState(lineNumber: number): TState | null {293return this._lineEndStates.get(lineNumber);294}295296public setEndState(lineNumber: number, state: TState): boolean {297const oldState = this._lineEndStates.get(lineNumber);298if (oldState && oldState.equals(state)) {299return false;300}301302this._lineEndStates.set(lineNumber, state);303return true;304}305306public acceptChange(range: LineRange, newLineCount: number): void {307let length = range.length;308if (newLineCount > 0 && length > 0) {309// Keep the last state, even though it is unrelated.310// But if the new state happens to agree with this last state, then we know we can stop tokenizing.311length--;312newLineCount--;313}314315this._lineEndStates.replace(range.startLineNumber, length, newLineCount);316}317318public acceptChanges(changes: IModelContentChange[]) {319for (const c of changes) {320const [eolCount] = countEOL(c.text);321this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);322}323}324}325326interface RangePriorityQueue {327get min(): number | null;328removeMin(): number | null;329330addRange(range: OffsetRange): void;331332addRangeAndResize(range: OffsetRange, newLength: number): void;333}334335export class RangePriorityQueueImpl implements RangePriorityQueue {336private readonly _ranges: OffsetRange[] = [];337338public getRanges(): OffsetRange[] {339return this._ranges;340}341342public get min(): number | null {343if (this._ranges.length === 0) {344return null;345}346return this._ranges[0].start;347}348349public removeMin(): number | null {350if (this._ranges.length === 0) {351return null;352}353const range = this._ranges[0];354if (range.start + 1 === range.endExclusive) {355this._ranges.shift();356} else {357this._ranges[0] = new OffsetRange(range.start + 1, range.endExclusive);358}359return range.start;360}361362public delete(value: number): void {363const idx = this._ranges.findIndex(r => r.contains(value));364if (idx !== -1) {365const range = this._ranges[idx];366if (range.start === value) {367if (range.endExclusive === value + 1) {368this._ranges.splice(idx, 1);369} else {370this._ranges[idx] = new OffsetRange(value + 1, range.endExclusive);371}372} else {373if (range.endExclusive === value + 1) {374this._ranges[idx] = new OffsetRange(range.start, value);375} else {376this._ranges.splice(idx, 1, new OffsetRange(range.start, value), new OffsetRange(value + 1, range.endExclusive));377}378}379}380}381382public addRange(range: OffsetRange): void {383OffsetRange.addRange(range, this._ranges);384}385386public addRangeAndResize(range: OffsetRange, newLength: number): void {387let idxFirstMightBeIntersecting = 0;388while (!(idxFirstMightBeIntersecting >= this._ranges.length || range.start <= this._ranges[idxFirstMightBeIntersecting].endExclusive)) {389idxFirstMightBeIntersecting++;390}391let idxFirstIsAfter = idxFirstMightBeIntersecting;392while (!(idxFirstIsAfter >= this._ranges.length || range.endExclusive < this._ranges[idxFirstIsAfter].start)) {393idxFirstIsAfter++;394}395const delta = newLength - range.length;396397for (let i = idxFirstIsAfter; i < this._ranges.length; i++) {398this._ranges[i] = this._ranges[i].delta(delta);399}400401if (idxFirstMightBeIntersecting === idxFirstIsAfter) {402const newRange = new OffsetRange(range.start, range.start + newLength);403if (!newRange.isEmpty) {404this._ranges.splice(idxFirstMightBeIntersecting, 0, newRange);405}406} else {407const start = Math.min(range.start, this._ranges[idxFirstMightBeIntersecting].start);408const endEx = Math.max(range.endExclusive, this._ranges[idxFirstIsAfter - 1].endExclusive);409410const newRange = new OffsetRange(start, endEx + delta);411if (!newRange.isEmpty) {412this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting, newRange);413} else {414this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting);415}416}417}418419toString() {420return this._ranges.map(r => r.toString()).join(' + ');421}422}423424425function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tokenizationSupport: ITokenizationSupport | null, text: string, hasEOL: boolean, state: IState): EncodedTokenizationResult {426let r: EncodedTokenizationResult | null = null;427428if (tokenizationSupport) {429try {430r = tokenizationSupport.tokenizeEncoded(text, hasEOL, state.clone());431} catch (e) {432onUnexpectedError(e);433}434}435436if (!r) {437r = nullTokenizeEncoded(languageIdCodec.encodeLanguageId(languageId), state);438}439440LineTokens.convertToEndOffset(r.tokens, text.length);441return r;442}443444export class DefaultBackgroundTokenizer implements IBackgroundTokenizer {445private _isDisposed = false;446447constructor(448private readonly _tokenizerWithStateStore: TokenizerWithStateStoreAndTextModel,449private readonly _backgroundTokenStore: IBackgroundTokenizationStore,450) {451}452453public dispose(): void {454this._isDisposed = true;455}456457public handleChanges(): void {458this._beginBackgroundTokenization();459}460461private _isScheduled = false;462private _beginBackgroundTokenization(): void {463if (this._isScheduled || !this._tokenizerWithStateStore._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {464return;465}466467this._isScheduled = true;468runWhenGlobalIdle((deadline) => {469this._isScheduled = false;470471this._backgroundTokenizeWithDeadline(deadline);472});473}474475/**476* Tokenize until the deadline occurs, but try to yield every 1-2ms.477*/478private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {479// Read the time remaining from the `deadline` immediately because it is unclear480// if the `deadline` object will be valid after execution leaves this function.481const endTime = Date.now() + deadline.timeRemaining();482483const execute = () => {484if (this._isDisposed || !this._tokenizerWithStateStore._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {485// disposed in the meantime or detached or finished486return;487}488489this._backgroundTokenizeForAtLeast1ms();490491if (Date.now() < endTime) {492// There is still time before reaching the deadline, so yield to the browser and then493// continue execution494setTimeout0(execute);495} else {496// The deadline has been reached, so schedule a new idle callback if necessary497this._beginBackgroundTokenization();498}499};500execute();501}502503/**504* Tokenize for at least 1ms.505*/506private _backgroundTokenizeForAtLeast1ms(): void {507const lineCount = this._tokenizerWithStateStore._textModel.getLineCount();508const builder = new ContiguousMultilineTokensBuilder();509const sw = StopWatch.create(false);510511do {512if (sw.elapsed() > 1) {513// the comparison is intentionally > 1 and not >= 1 to ensure that514// a full millisecond has elapsed, given how microseconds are rounded515// to milliseconds516break;517}518519const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);520521if (tokenizedLineNumber >= lineCount) {522break;523}524} while (this._hasLinesToTokenize());525526this._backgroundTokenStore.setTokens(builder.finalize());527this.checkFinished();528}529530private _hasLinesToTokenize(): boolean {531if (!this._tokenizerWithStateStore) {532return false;533}534return !this._tokenizerWithStateStore.store.allStatesValid();535}536537private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {538const firstInvalidLine = this._tokenizerWithStateStore?.getFirstInvalidLine();539if (!firstInvalidLine) {540return this._tokenizerWithStateStore._textModel.getLineCount() + 1;541}542this._tokenizerWithStateStore.updateTokensUntilLine(builder, firstInvalidLine.lineNumber);543return firstInvalidLine.lineNumber;544}545546public checkFinished(): void {547if (this._isDisposed) {548return;549}550if (this._tokenizerWithStateStore.store.allStatesValid()) {551this._backgroundTokenStore.backgroundTokenizationFinished();552}553}554555public requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {556this._tokenizerWithStateStore.store.invalidateEndStateRange(new LineRange(startLineNumber, endLineNumberExclusive));557}558}559560561