Path: blob/main/src/vs/editor/common/tokens/sparseMultilineTokens.ts
5275 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import { Position } from '../core/position.js';7import { IRange, Range } from '../core/range.js';8import { countEOL } from '../core/misc/eolCounter.js';9import { ITextModel } from '../model.js';10import { RateLimiter } from './common.js';1112/**13* Represents sparse tokens over a contiguous range of lines.14*/15export class SparseMultilineTokens {1617public static create(startLineNumber: number, tokens: Uint32Array): SparseMultilineTokens {18return new SparseMultilineTokens(startLineNumber, new SparseMultilineTokensStorage(tokens));19}2021private _startLineNumber: number;22private _endLineNumber: number;23private readonly _tokens: SparseMultilineTokensStorage;2425/**26* (Inclusive) start line number for these tokens.27*/28public get startLineNumber(): number {29return this._startLineNumber;30}3132/**33* (Inclusive) end line number for these tokens.34*/35public get endLineNumber(): number {36return this._endLineNumber;37}3839private constructor(startLineNumber: number, tokens: SparseMultilineTokensStorage) {40this._startLineNumber = startLineNumber;41this._tokens = tokens;42this._endLineNumber = this._startLineNumber + this._tokens.getMaxDeltaLine();43}4445public toString(): string {46return this._tokens.toString(this._startLineNumber);47}4849private _updateEndLineNumber(): void {50this._endLineNumber = this._startLineNumber + this._tokens.getMaxDeltaLine();51}5253public isEmpty(): boolean {54return this._tokens.isEmpty();55}5657public getLineTokens(lineNumber: number): SparseLineTokens | null {58if (this._startLineNumber <= lineNumber && lineNumber <= this._endLineNumber) {59return this._tokens.getLineTokens(lineNumber - this._startLineNumber);60}61return null;62}6364public getRange(): Range | null {65const deltaRange = this._tokens.getRange();66if (!deltaRange) {67return deltaRange;68}69return new Range(this._startLineNumber + deltaRange.startLineNumber, deltaRange.startColumn, this._startLineNumber + deltaRange.endLineNumber, deltaRange.endColumn);70}7172public removeTokens(range: Range): void {73const startLineIndex = range.startLineNumber - this._startLineNumber;74const endLineIndex = range.endLineNumber - this._startLineNumber;7576this._startLineNumber += this._tokens.removeTokens(startLineIndex, range.startColumn - 1, endLineIndex, range.endColumn - 1);77this._updateEndLineNumber();78}7980public split(range: Range): [SparseMultilineTokens, SparseMultilineTokens] {81// split tokens to two:82// a) all the tokens before `range`83// b) all the tokens after `range`84const startLineIndex = range.startLineNumber - this._startLineNumber;85const endLineIndex = range.endLineNumber - this._startLineNumber;8687const [a, b, bDeltaLine] = this._tokens.split(startLineIndex, range.startColumn - 1, endLineIndex, range.endColumn - 1);88return [new SparseMultilineTokens(this._startLineNumber, a), new SparseMultilineTokens(this._startLineNumber + bDeltaLine, b)];89}9091public applyEdit(range: IRange, text: string): void {92const [eolCount, firstLineLength, lastLineLength] = countEOL(text);93this.acceptEdit(range, eolCount, firstLineLength, lastLineLength, text.length > 0 ? text.charCodeAt(0) : CharCode.Null);94}9596public acceptEdit(range: IRange, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {97this._acceptDeleteRange(range);98this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength, lastLineLength, firstCharCode);99this._updateEndLineNumber();100}101102private _acceptDeleteRange(range: IRange): void {103if (range.startLineNumber === range.endLineNumber && range.startColumn === range.endColumn) {104// Nothing to delete105return;106}107108const firstLineIndex = range.startLineNumber - this._startLineNumber;109const lastLineIndex = range.endLineNumber - this._startLineNumber;110111if (lastLineIndex < 0) {112// this deletion occurs entirely before this block, so we only need to adjust line numbers113const deletedLinesCount = lastLineIndex - firstLineIndex;114this._startLineNumber -= deletedLinesCount;115return;116}117118const tokenMaxDeltaLine = this._tokens.getMaxDeltaLine();119120if (firstLineIndex >= tokenMaxDeltaLine + 1) {121// this deletion occurs entirely after this block, so there is nothing to do122return;123}124125if (firstLineIndex < 0 && lastLineIndex >= tokenMaxDeltaLine + 1) {126// this deletion completely encompasses this block127this._startLineNumber = 0;128this._tokens.clear();129return;130}131132if (firstLineIndex < 0) {133const deletedBefore = -firstLineIndex;134this._startLineNumber -= deletedBefore;135136this._tokens.acceptDeleteRange(range.startColumn - 1, 0, 0, lastLineIndex, range.endColumn - 1);137} else {138this._tokens.acceptDeleteRange(0, firstLineIndex, range.startColumn - 1, lastLineIndex, range.endColumn - 1);139}140}141142private _acceptInsertText(position: Position, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {143144if (eolCount === 0 && firstLineLength === 0) {145// Nothing to insert146return;147}148149const lineIndex = position.lineNumber - this._startLineNumber;150151if (lineIndex < 0) {152// this insertion occurs before this block, so we only need to adjust line numbers153this._startLineNumber += eolCount;154return;155}156157const tokenMaxDeltaLine = this._tokens.getMaxDeltaLine();158159if (lineIndex >= tokenMaxDeltaLine + 1) {160// this insertion occurs after this block, so there is nothing to do161return;162}163164this._tokens.acceptInsertText(lineIndex, position.column - 1, eolCount, firstLineLength, lastLineLength, firstCharCode);165}166167public reportIfInvalid(model: ITextModel): void {168this._tokens.reportIfInvalid(model, this._startLineNumber);169}170}171172class SparseMultilineTokensStorage {173/**174* The encoding of tokens is:175* 4*i deltaLine (from `startLineNumber`)176* 4*i+1 startCharacter (from the line start)177* 4*i+2 endCharacter (from the line start)178* 4*i+3 metadata179*/180private readonly _tokens: Uint32Array;181private _tokenCount: number;182183constructor(tokens: Uint32Array) {184this._tokens = tokens;185this._tokenCount = tokens.length / 4;186}187188public toString(startLineNumber: number): string {189const pieces: string[] = [];190for (let i = 0; i < this._tokenCount; i++) {191pieces.push(`(${this._getDeltaLine(i) + startLineNumber},${this._getStartCharacter(i)}-${this._getEndCharacter(i)})`);192}193return `[${pieces.join(',')}]`;194}195196public getMaxDeltaLine(): number {197const tokenCount = this._getTokenCount();198if (tokenCount === 0) {199return -1;200}201return this._getDeltaLine(tokenCount - 1);202}203204public getRange(): Range | null {205const tokenCount = this._getTokenCount();206if (tokenCount === 0) {207return null;208}209const startChar = this._getStartCharacter(0);210const maxDeltaLine = this._getDeltaLine(tokenCount - 1);211const endChar = this._getEndCharacter(tokenCount - 1);212return new Range(0, startChar + 1, maxDeltaLine, endChar + 1);213}214215private _getTokenCount(): number {216return this._tokenCount;217}218219private _getDeltaLine(tokenIndex: number): number {220return this._tokens[4 * tokenIndex];221}222223private _getStartCharacter(tokenIndex: number): number {224return this._tokens[4 * tokenIndex + 1];225}226227private _getEndCharacter(tokenIndex: number): number {228return this._tokens[4 * tokenIndex + 2];229}230231public isEmpty(): boolean {232return (this._getTokenCount() === 0);233}234235public getLineTokens(deltaLine: number): SparseLineTokens | null {236let low = 0;237let high = this._getTokenCount() - 1;238239while (low < high) {240const mid = low + Math.floor((high - low) / 2);241const midDeltaLine = this._getDeltaLine(mid);242243if (midDeltaLine < deltaLine) {244low = mid + 1;245} else if (midDeltaLine > deltaLine) {246high = mid - 1;247} else {248let min = mid;249while (min > low && this._getDeltaLine(min - 1) === deltaLine) {250min--;251}252let max = mid;253while (max < high && this._getDeltaLine(max + 1) === deltaLine) {254max++;255}256return new SparseLineTokens(this._tokens.subarray(4 * min, 4 * max + 4));257}258}259260if (this._getDeltaLine(low) === deltaLine) {261return new SparseLineTokens(this._tokens.subarray(4 * low, 4 * low + 4));262}263264return null;265}266267public clear(): void {268this._tokenCount = 0;269}270271public removeTokens(startDeltaLine: number, startChar: number, endDeltaLine: number, endChar: number): number {272const tokens = this._tokens;273const tokenCount = this._tokenCount;274let newTokenCount = 0;275let hasDeletedTokens = false;276let firstDeltaLine = 0;277for (let i = 0; i < tokenCount; i++) {278const srcOffset = 4 * i;279const tokenDeltaLine = tokens[srcOffset];280const tokenStartCharacter = tokens[srcOffset + 1];281const tokenEndCharacter = tokens[srcOffset + 2];282const tokenMetadata = tokens[srcOffset + 3];283284if (285(tokenDeltaLine > startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter >= startChar))286&& (tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter <= endChar))287) {288hasDeletedTokens = true;289} else {290if (newTokenCount === 0) {291firstDeltaLine = tokenDeltaLine;292}293if (hasDeletedTokens) {294// must move the token to the left295const destOffset = 4 * newTokenCount;296tokens[destOffset] = tokenDeltaLine - firstDeltaLine;297tokens[destOffset + 1] = tokenStartCharacter;298tokens[destOffset + 2] = tokenEndCharacter;299tokens[destOffset + 3] = tokenMetadata;300} else if (firstDeltaLine !== 0) {301// must adjust the delta line in place302tokens[srcOffset] = tokenDeltaLine - firstDeltaLine;303}304newTokenCount++;305}306}307308this._tokenCount = newTokenCount;309310return firstDeltaLine;311}312313public split(startDeltaLine: number, startChar: number, endDeltaLine: number, endChar: number): [SparseMultilineTokensStorage, SparseMultilineTokensStorage, number] {314const tokens = this._tokens;315const tokenCount = this._tokenCount;316const aTokens: number[] = [];317const bTokens: number[] = [];318let destTokens: number[] = aTokens;319let destOffset = 0;320let destFirstDeltaLine: number = 0;321for (let i = 0; i < tokenCount; i++) {322const srcOffset = 4 * i;323const tokenDeltaLine = tokens[srcOffset];324const tokenStartCharacter = tokens[srcOffset + 1];325const tokenEndCharacter = tokens[srcOffset + 2];326const tokenMetadata = tokens[srcOffset + 3];327328if ((tokenDeltaLine > startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter >= startChar))) {329if ((tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter <= endChar))) {330// this token is touching the range331continue;332} else {333// this token is after the range334if (destTokens !== bTokens) {335// this token is the first token after the range336destTokens = bTokens;337destOffset = 0;338destFirstDeltaLine = tokenDeltaLine;339}340}341}342343destTokens[destOffset++] = tokenDeltaLine - destFirstDeltaLine;344destTokens[destOffset++] = tokenStartCharacter;345destTokens[destOffset++] = tokenEndCharacter;346destTokens[destOffset++] = tokenMetadata;347}348349return [new SparseMultilineTokensStorage(new Uint32Array(aTokens)), new SparseMultilineTokensStorage(new Uint32Array(bTokens)), destFirstDeltaLine];350}351352public acceptDeleteRange(horizontalShiftForFirstLineTokens: number, startDeltaLine: number, startCharacter: number, endDeltaLine: number, endCharacter: number): void {353// This is a bit complex, here are the cases I used to think about this:354//355// 1. The token starts before the deletion range356// 1a. The token is completely before the deletion range357// -----------358// xxxxxxxxxxx359// 1b. The token starts before, the deletion range ends after the token360// -----------361// xxxxxxxxxxx362// 1c. The token starts before, the deletion range ends precisely with the token363// ---------------364// xxxxxxxx365// 1d. The token starts before, the deletion range is inside the token366// ---------------367// xxxxx368//369// 2. The token starts at the same position with the deletion range370// 2a. The token starts at the same position, and ends inside the deletion range371// -------372// xxxxxxxxxxx373// 2b. The token starts at the same position, and ends at the same position as the deletion range374// ----------375// xxxxxxxxxx376// 2c. The token starts at the same position, and ends after the deletion range377// -------------378// xxxxxxx379//380// 3. The token starts inside the deletion range381// 3a. The token is inside the deletion range382// -------383// xxxxxxxxxxxxx384// 3b. The token starts inside the deletion range, and ends at the same position as the deletion range385// ----------386// xxxxxxxxxxxxx387// 3c. The token starts inside the deletion range, and ends after the deletion range388// ------------389// xxxxxxxxxxx390//391// 4. The token starts after the deletion range392// -----------393// xxxxxxxx394//395const tokens = this._tokens;396const tokenCount = this._tokenCount;397const deletedLineCount = (endDeltaLine - startDeltaLine);398let newTokenCount = 0;399let hasDeletedTokens = false;400for (let i = 0; i < tokenCount; i++) {401const srcOffset = 4 * i;402let tokenDeltaLine = tokens[srcOffset];403let tokenStartCharacter = tokens[srcOffset + 1];404let tokenEndCharacter = tokens[srcOffset + 2];405const tokenMetadata = tokens[srcOffset + 3];406407if (tokenDeltaLine < startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter <= startCharacter)) {408// 1a. The token is completely before the deletion range409// => nothing to do410newTokenCount++;411continue;412} else if (tokenDeltaLine === startDeltaLine && tokenStartCharacter < startCharacter) {413// 1b, 1c, 1d414// => the token survives, but it needs to shrink415if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {416// 1d. The token starts before, the deletion range is inside the token417// => the token shrinks by the deletion character count418tokenEndCharacter -= (endCharacter - startCharacter);419} else {420// 1b. The token starts before, the deletion range ends after the token421// 1c. The token starts before, the deletion range ends precisely with the token422// => the token shrinks its ending to the deletion start423tokenEndCharacter = startCharacter;424}425} else if (tokenDeltaLine === startDeltaLine && tokenStartCharacter === startCharacter) {426// 2a, 2b, 2c427if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {428// 2c. The token starts at the same position, and ends after the deletion range429// => the token shrinks by the deletion character count430tokenEndCharacter -= (endCharacter - startCharacter);431} else {432// 2a. The token starts at the same position, and ends inside the deletion range433// 2b. The token starts at the same position, and ends at the same position as the deletion range434// => the token is deleted435hasDeletedTokens = true;436continue;437}438} else if (tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter < endCharacter)) {439// 3a, 3b, 3c440if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {441// 3c. The token starts inside the deletion range, and ends after the deletion range442// => the token moves to continue right after the deletion443tokenDeltaLine = startDeltaLine;444tokenStartCharacter = startCharacter;445tokenEndCharacter = tokenStartCharacter + (tokenEndCharacter - endCharacter);446} else {447// 3a. The token is inside the deletion range448// 3b. The token starts inside the deletion range, and ends at the same position as the deletion range449// => the token is deleted450hasDeletedTokens = true;451continue;452}453} else if (tokenDeltaLine > endDeltaLine) {454// 4. (partial) The token starts after the deletion range, on a line below...455if (deletedLineCount === 0 && !hasDeletedTokens) {456// early stop, there is no need to walk all the tokens and do nothing...457newTokenCount = tokenCount;458break;459}460tokenDeltaLine -= deletedLineCount;461} else if (tokenDeltaLine === endDeltaLine && tokenStartCharacter >= endCharacter) {462// 4. (continued) The token starts after the deletion range, on the last line where a deletion occurs463if (horizontalShiftForFirstLineTokens && tokenDeltaLine === 0) {464tokenStartCharacter += horizontalShiftForFirstLineTokens;465tokenEndCharacter += horizontalShiftForFirstLineTokens;466}467tokenDeltaLine -= deletedLineCount;468tokenStartCharacter -= (endCharacter - startCharacter);469tokenEndCharacter -= (endCharacter - startCharacter);470} else {471throw new Error(`Not possible!`);472}473474const destOffset = 4 * newTokenCount;475tokens[destOffset] = tokenDeltaLine;476tokens[destOffset + 1] = tokenStartCharacter;477tokens[destOffset + 2] = tokenEndCharacter;478tokens[destOffset + 3] = tokenMetadata;479newTokenCount++;480}481482this._tokenCount = newTokenCount;483}484485public acceptInsertText(deltaLine: number, character: number, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {486// Here are the cases I used to think about this:487//488// 1. The token is completely before the insertion point489// ----------- |490// 2. The token ends precisely at the insertion point491// -----------|492// 3. The token contains the insertion point493// -----|------494// 4. The token starts precisely at the insertion point495// |-----------496// 5. The token is completely after the insertion point497// | -----------498//499const isInsertingPreciselyOneWordCharacter = (500eolCount === 0501&& firstLineLength === 1502&& (503(firstCharCode >= CharCode.Digit0 && firstCharCode <= CharCode.Digit9)504|| (firstCharCode >= CharCode.A && firstCharCode <= CharCode.Z)505|| (firstCharCode >= CharCode.a && firstCharCode <= CharCode.z)506)507);508const tokens = this._tokens;509const tokenCount = this._tokenCount;510for (let i = 0; i < tokenCount; i++) {511const offset = 4 * i;512let tokenDeltaLine = tokens[offset];513let tokenStartCharacter = tokens[offset + 1];514let tokenEndCharacter = tokens[offset + 2];515516if (tokenDeltaLine < deltaLine || (tokenDeltaLine === deltaLine && tokenEndCharacter < character)) {517// 1. The token is completely before the insertion point518// => nothing to do519continue;520} else if (tokenDeltaLine === deltaLine && tokenEndCharacter === character) {521// 2. The token ends precisely at the insertion point522// => expand the end character only if inserting precisely one character that is a word character523if (isInsertingPreciselyOneWordCharacter) {524tokenEndCharacter += 1;525} else {526continue;527}528} else if (tokenDeltaLine === deltaLine && tokenStartCharacter < character && character < tokenEndCharacter) {529// 3. The token contains the insertion point530if (eolCount === 0) {531// => just expand the end character532tokenEndCharacter += firstLineLength;533} else {534// => cut off the token535tokenEndCharacter = character;536}537} else {538// 4. or 5.539if (tokenDeltaLine === deltaLine && tokenStartCharacter === character) {540// 4. The token starts precisely at the insertion point541// => grow the token (by keeping its start constant) only if inserting precisely one character that is a word character542// => otherwise behave as in case 5.543if (isInsertingPreciselyOneWordCharacter) {544continue;545}546}547// => the token must move and keep its size constant548if (tokenDeltaLine === deltaLine) {549tokenDeltaLine += eolCount;550// this token is on the line where the insertion is taking place551if (eolCount === 0) {552tokenStartCharacter += firstLineLength;553tokenEndCharacter += firstLineLength;554} else {555const tokenLength = tokenEndCharacter - tokenStartCharacter;556tokenStartCharacter = lastLineLength + (tokenStartCharacter - character);557tokenEndCharacter = tokenStartCharacter + tokenLength;558}559} else {560tokenDeltaLine += eolCount;561}562}563564tokens[offset] = tokenDeltaLine;565tokens[offset + 1] = tokenStartCharacter;566tokens[offset + 2] = tokenEndCharacter;567}568}569570private static _rateLimiter = new RateLimiter(10 / 60); // limit to 10 times per minute571572public reportIfInvalid(model: ITextModel, startLineNumber: number): void {573for (let i = 0; i < this._tokenCount; i++) {574const lineNumber = this._getDeltaLine(i) + startLineNumber;575576if (lineNumber < 1) {577SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {578console.error('Invalid Semantic Tokens Data From Extension: lineNumber < 1');579});580} else if (lineNumber > model.getLineCount()) {581SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {582console.error('Invalid Semantic Tokens Data From Extension: lineNumber > model.getLineCount()');583});584} else if (this._getEndCharacter(i) > model.getLineLength(lineNumber)) {585SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {586console.error('Invalid Semantic Tokens Data From Extension: end character > model.getLineLength(lineNumber)');587});588}589}590}591}592593export class SparseLineTokens {594595private readonly _tokens: Uint32Array;596597constructor(tokens: Uint32Array) {598this._tokens = tokens;599}600601public getCount(): number {602return this._tokens.length / 4;603}604605public getStartCharacter(tokenIndex: number): number {606return this._tokens[4 * tokenIndex + 1];607}608609public getEndCharacter(tokenIndex: number): number {610return this._tokens[4 * tokenIndex + 2];611}612613public getMetadata(tokenIndex: number): number {614return this._tokens[4 * tokenIndex + 3];615}616}617618619