Path: blob/main/src/vs/editor/common/tokens/sparseMultilineTokens.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import { Position } from '../core/position.js';7import { IRange, Range } from '../core/range.js';8import { countEOL } from '../core/misc/eolCounter.js';9import { ITextModel } from '../model.js';10import { RateLimiter } from './common.js';1112/**13* Represents sparse tokens over a contiguous range of lines.14*/15export class SparseMultilineTokens {1617public static create(startLineNumber: number, tokens: Uint32Array): SparseMultilineTokens {18return new SparseMultilineTokens(startLineNumber, new SparseMultilineTokensStorage(tokens));19}2021private _startLineNumber: number;22private _endLineNumber: number;23private readonly _tokens: SparseMultilineTokensStorage;2425/**26* (Inclusive) start line number for these tokens.27*/28public get startLineNumber(): number {29return this._startLineNumber;30}3132/**33* (Inclusive) end line number for these tokens.34*/35public get endLineNumber(): number {36return this._endLineNumber;37}3839private constructor(startLineNumber: number, tokens: SparseMultilineTokensStorage) {40this._startLineNumber = startLineNumber;41this._tokens = tokens;42this._endLineNumber = this._startLineNumber + this._tokens.getMaxDeltaLine();43}4445public toString(): string {46return this._tokens.toString(this._startLineNumber);47}4849private _updateEndLineNumber(): void {50this._endLineNumber = this._startLineNumber + this._tokens.getMaxDeltaLine();51}5253public isEmpty(): boolean {54return this._tokens.isEmpty();55}5657public getLineTokens(lineNumber: number): SparseLineTokens | null {58if (this._startLineNumber <= lineNumber && lineNumber <= this._endLineNumber) {59return this._tokens.getLineTokens(lineNumber - this._startLineNumber);60}61return null;62}6364public getRange(): Range | null {65const deltaRange = this._tokens.getRange();66if (!deltaRange) {67return deltaRange;68}69return new Range(this._startLineNumber + deltaRange.startLineNumber, deltaRange.startColumn, this._startLineNumber + deltaRange.endLineNumber, deltaRange.endColumn);70}7172public removeTokens(range: Range): void {73const startLineIndex = range.startLineNumber - this._startLineNumber;74const endLineIndex = range.endLineNumber - this._startLineNumber;7576this._startLineNumber += this._tokens.removeTokens(startLineIndex, range.startColumn - 1, endLineIndex, range.endColumn - 1);77this._updateEndLineNumber();78}7980public split(range: Range): [SparseMultilineTokens, SparseMultilineTokens] {81// split tokens to two:82// a) all the tokens before `range`83// b) all the tokens after `range`84const startLineIndex = range.startLineNumber - this._startLineNumber;85const endLineIndex = range.endLineNumber - this._startLineNumber;8687const [a, b, bDeltaLine] = this._tokens.split(startLineIndex, range.startColumn - 1, endLineIndex, range.endColumn - 1);88return [new SparseMultilineTokens(this._startLineNumber, a), new SparseMultilineTokens(this._startLineNumber + bDeltaLine, b)];89}9091public applyEdit(range: IRange, text: string): void {92const [eolCount, firstLineLength, lastLineLength] = countEOL(text);93this.acceptEdit(range, eolCount, firstLineLength, lastLineLength, text.length > 0 ? text.charCodeAt(0) : CharCode.Null);94}9596public acceptEdit(range: IRange, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {97this._acceptDeleteRange(range);98this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength, lastLineLength, firstCharCode);99this._updateEndLineNumber();100}101102private _acceptDeleteRange(range: IRange): void {103if (range.startLineNumber === range.endLineNumber && range.startColumn === range.endColumn) {104// Nothing to delete105return;106}107108const firstLineIndex = range.startLineNumber - this._startLineNumber;109const lastLineIndex = range.endLineNumber - this._startLineNumber;110111if (lastLineIndex < 0) {112// this deletion occurs entirely before this block, so we only need to adjust line numbers113const deletedLinesCount = lastLineIndex - firstLineIndex;114this._startLineNumber -= deletedLinesCount;115return;116}117118const tokenMaxDeltaLine = this._tokens.getMaxDeltaLine();119120if (firstLineIndex >= tokenMaxDeltaLine + 1) {121// this deletion occurs entirely after this block, so there is nothing to do122return;123}124125if (firstLineIndex < 0 && lastLineIndex >= tokenMaxDeltaLine + 1) {126// this deletion completely encompasses this block127this._startLineNumber = 0;128this._tokens.clear();129return;130}131132if (firstLineIndex < 0) {133const deletedBefore = -firstLineIndex;134this._startLineNumber -= deletedBefore;135136this._tokens.acceptDeleteRange(range.startColumn - 1, 0, 0, lastLineIndex, range.endColumn - 1);137} else {138this._tokens.acceptDeleteRange(0, firstLineIndex, range.startColumn - 1, lastLineIndex, range.endColumn - 1);139}140}141142private _acceptInsertText(position: Position, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {143144if (eolCount === 0 && firstLineLength === 0) {145// Nothing to insert146return;147}148149const lineIndex = position.lineNumber - this._startLineNumber;150151if (lineIndex < 0) {152// this insertion occurs before this block, so we only need to adjust line numbers153this._startLineNumber += eolCount;154return;155}156157const tokenMaxDeltaLine = this._tokens.getMaxDeltaLine();158159if (lineIndex >= tokenMaxDeltaLine + 1) {160// this insertion occurs after this block, so there is nothing to do161return;162}163164this._tokens.acceptInsertText(lineIndex, position.column - 1, eolCount, firstLineLength, lastLineLength, firstCharCode);165}166167public reportIfInvalid(model: ITextModel): void {168this._tokens.reportIfInvalid(model, this._startLineNumber);169}170}171172class SparseMultilineTokensStorage {173/**174* The encoding of tokens is:175* 4*i deltaLine (from `startLineNumber`)176* 4*i+1 startCharacter (from the line start)177* 4*i+2 endCharacter (from the line start)178* 4*i+3 metadata179*/180private readonly _tokens: Uint32Array;181private _tokenCount: number;182183constructor(tokens: Uint32Array) {184this._tokens = tokens;185this._tokenCount = tokens.length / 4;186}187188public toString(startLineNumber: number): string {189const pieces: string[] = [];190for (let i = 0; i < this._tokenCount; i++) {191pieces.push(`(${this._getDeltaLine(i) + startLineNumber},${this._getStartCharacter(i)}-${this._getEndCharacter(i)})`);192}193return `[${pieces.join(',')}]`;194}195196public getMaxDeltaLine(): number {197const tokenCount = this._getTokenCount();198if (tokenCount === 0) {199return -1;200}201return this._getDeltaLine(tokenCount - 1);202}203204public getRange(): Range | null {205const tokenCount = this._getTokenCount();206if (tokenCount === 0) {207return null;208}209const startChar = this._getStartCharacter(0);210const maxDeltaLine = this._getDeltaLine(tokenCount - 1);211const endChar = this._getEndCharacter(tokenCount - 1);212return new Range(0, startChar + 1, maxDeltaLine, endChar + 1);213}214215private _getTokenCount(): number {216return this._tokenCount;217}218219private _getDeltaLine(tokenIndex: number): number {220return this._tokens[4 * tokenIndex];221}222223private _getStartCharacter(tokenIndex: number): number {224return this._tokens[4 * tokenIndex + 1];225}226227private _getEndCharacter(tokenIndex: number): number {228return this._tokens[4 * tokenIndex + 2];229}230231public isEmpty(): boolean {232return (this._getTokenCount() === 0);233}234235public getLineTokens(deltaLine: number): SparseLineTokens | null {236let low = 0;237let high = this._getTokenCount() - 1;238239while (low < high) {240const mid = low + Math.floor((high - low) / 2);241const midDeltaLine = this._getDeltaLine(mid);242243if (midDeltaLine < deltaLine) {244low = mid + 1;245} else if (midDeltaLine > deltaLine) {246high = mid - 1;247} else {248let min = mid;249while (min > low && this._getDeltaLine(min - 1) === deltaLine) {250min--;251}252let max = mid;253while (max < high && this._getDeltaLine(max + 1) === deltaLine) {254max++;255}256return new SparseLineTokens(this._tokens.subarray(4 * min, 4 * max + 4));257}258}259260if (this._getDeltaLine(low) === deltaLine) {261return new SparseLineTokens(this._tokens.subarray(4 * low, 4 * low + 4));262}263264return null;265}266267public clear(): void {268this._tokenCount = 0;269}270271public removeTokens(startDeltaLine: number, startChar: number, endDeltaLine: number, endChar: number): number {272const tokens = this._tokens;273const tokenCount = this._tokenCount;274let newTokenCount = 0;275let hasDeletedTokens = false;276let firstDeltaLine = 0;277for (let i = 0; i < tokenCount; i++) {278const srcOffset = 4 * i;279const tokenDeltaLine = tokens[srcOffset];280const tokenStartCharacter = tokens[srcOffset + 1];281const tokenEndCharacter = tokens[srcOffset + 2];282const tokenMetadata = tokens[srcOffset + 3];283284if (285(tokenDeltaLine > startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter >= startChar))286&& (tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter <= endChar))287) {288hasDeletedTokens = true;289} else {290if (newTokenCount === 0) {291firstDeltaLine = tokenDeltaLine;292}293if (hasDeletedTokens) {294// must move the token to the left295const destOffset = 4 * newTokenCount;296tokens[destOffset] = tokenDeltaLine - firstDeltaLine;297tokens[destOffset + 1] = tokenStartCharacter;298tokens[destOffset + 2] = tokenEndCharacter;299tokens[destOffset + 3] = tokenMetadata;300}301newTokenCount++;302}303}304305this._tokenCount = newTokenCount;306307return firstDeltaLine;308}309310public split(startDeltaLine: number, startChar: number, endDeltaLine: number, endChar: number): [SparseMultilineTokensStorage, SparseMultilineTokensStorage, number] {311const tokens = this._tokens;312const tokenCount = this._tokenCount;313const aTokens: number[] = [];314const bTokens: number[] = [];315let destTokens: number[] = aTokens;316let destOffset = 0;317let destFirstDeltaLine: number = 0;318for (let i = 0; i < tokenCount; i++) {319const srcOffset = 4 * i;320const tokenDeltaLine = tokens[srcOffset];321const tokenStartCharacter = tokens[srcOffset + 1];322const tokenEndCharacter = tokens[srcOffset + 2];323const tokenMetadata = tokens[srcOffset + 3];324325if ((tokenDeltaLine > startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter >= startChar))) {326if ((tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter <= endChar))) {327// this token is touching the range328continue;329} else {330// this token is after the range331if (destTokens !== bTokens) {332// this token is the first token after the range333destTokens = bTokens;334destOffset = 0;335destFirstDeltaLine = tokenDeltaLine;336}337}338}339340destTokens[destOffset++] = tokenDeltaLine - destFirstDeltaLine;341destTokens[destOffset++] = tokenStartCharacter;342destTokens[destOffset++] = tokenEndCharacter;343destTokens[destOffset++] = tokenMetadata;344}345346return [new SparseMultilineTokensStorage(new Uint32Array(aTokens)), new SparseMultilineTokensStorage(new Uint32Array(bTokens)), destFirstDeltaLine];347}348349public acceptDeleteRange(horizontalShiftForFirstLineTokens: number, startDeltaLine: number, startCharacter: number, endDeltaLine: number, endCharacter: number): void {350// This is a bit complex, here are the cases I used to think about this:351//352// 1. The token starts before the deletion range353// 1a. The token is completely before the deletion range354// -----------355// xxxxxxxxxxx356// 1b. The token starts before, the deletion range ends after the token357// -----------358// xxxxxxxxxxx359// 1c. The token starts before, the deletion range ends precisely with the token360// ---------------361// xxxxxxxx362// 1d. The token starts before, the deletion range is inside the token363// ---------------364// xxxxx365//366// 2. The token starts at the same position with the deletion range367// 2a. The token starts at the same position, and ends inside the deletion range368// -------369// xxxxxxxxxxx370// 2b. The token starts at the same position, and ends at the same position as the deletion range371// ----------372// xxxxxxxxxx373// 2c. The token starts at the same position, and ends after the deletion range374// -------------375// xxxxxxx376//377// 3. The token starts inside the deletion range378// 3a. The token is inside the deletion range379// -------380// xxxxxxxxxxxxx381// 3b. The token starts inside the deletion range, and ends at the same position as the deletion range382// ----------383// xxxxxxxxxxxxx384// 3c. The token starts inside the deletion range, and ends after the deletion range385// ------------386// xxxxxxxxxxx387//388// 4. The token starts after the deletion range389// -----------390// xxxxxxxx391//392const tokens = this._tokens;393const tokenCount = this._tokenCount;394const deletedLineCount = (endDeltaLine - startDeltaLine);395let newTokenCount = 0;396let hasDeletedTokens = false;397for (let i = 0; i < tokenCount; i++) {398const srcOffset = 4 * i;399let tokenDeltaLine = tokens[srcOffset];400let tokenStartCharacter = tokens[srcOffset + 1];401let tokenEndCharacter = tokens[srcOffset + 2];402const tokenMetadata = tokens[srcOffset + 3];403404if (tokenDeltaLine < startDeltaLine || (tokenDeltaLine === startDeltaLine && tokenEndCharacter <= startCharacter)) {405// 1a. The token is completely before the deletion range406// => nothing to do407newTokenCount++;408continue;409} else if (tokenDeltaLine === startDeltaLine && tokenStartCharacter < startCharacter) {410// 1b, 1c, 1d411// => the token survives, but it needs to shrink412if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {413// 1d. The token starts before, the deletion range is inside the token414// => the token shrinks by the deletion character count415tokenEndCharacter -= (endCharacter - startCharacter);416} else {417// 1b. The token starts before, the deletion range ends after the token418// 1c. The token starts before, the deletion range ends precisely with the token419// => the token shrinks its ending to the deletion start420tokenEndCharacter = startCharacter;421}422} else if (tokenDeltaLine === startDeltaLine && tokenStartCharacter === startCharacter) {423// 2a, 2b, 2c424if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {425// 2c. The token starts at the same position, and ends after the deletion range426// => the token shrinks by the deletion character count427tokenEndCharacter -= (endCharacter - startCharacter);428} else {429// 2a. The token starts at the same position, and ends inside the deletion range430// 2b. The token starts at the same position, and ends at the same position as the deletion range431// => the token is deleted432hasDeletedTokens = true;433continue;434}435} else if (tokenDeltaLine < endDeltaLine || (tokenDeltaLine === endDeltaLine && tokenStartCharacter < endCharacter)) {436// 3a, 3b, 3c437if (tokenDeltaLine === endDeltaLine && tokenEndCharacter > endCharacter) {438// 3c. The token starts inside the deletion range, and ends after the deletion range439// => the token moves to continue right after the deletion440tokenDeltaLine = startDeltaLine;441tokenStartCharacter = startCharacter;442tokenEndCharacter = tokenStartCharacter + (tokenEndCharacter - endCharacter);443} else {444// 3a. The token is inside the deletion range445// 3b. The token starts inside the deletion range, and ends at the same position as the deletion range446// => the token is deleted447hasDeletedTokens = true;448continue;449}450} else if (tokenDeltaLine > endDeltaLine) {451// 4. (partial) The token starts after the deletion range, on a line below...452if (deletedLineCount === 0 && !hasDeletedTokens) {453// early stop, there is no need to walk all the tokens and do nothing...454newTokenCount = tokenCount;455break;456}457tokenDeltaLine -= deletedLineCount;458} else if (tokenDeltaLine === endDeltaLine && tokenStartCharacter >= endCharacter) {459// 4. (continued) The token starts after the deletion range, on the last line where a deletion occurs460if (horizontalShiftForFirstLineTokens && tokenDeltaLine === 0) {461tokenStartCharacter += horizontalShiftForFirstLineTokens;462tokenEndCharacter += horizontalShiftForFirstLineTokens;463}464tokenDeltaLine -= deletedLineCount;465tokenStartCharacter -= (endCharacter - startCharacter);466tokenEndCharacter -= (endCharacter - startCharacter);467} else {468throw new Error(`Not possible!`);469}470471const destOffset = 4 * newTokenCount;472tokens[destOffset] = tokenDeltaLine;473tokens[destOffset + 1] = tokenStartCharacter;474tokens[destOffset + 2] = tokenEndCharacter;475tokens[destOffset + 3] = tokenMetadata;476newTokenCount++;477}478479this._tokenCount = newTokenCount;480}481482public acceptInsertText(deltaLine: number, character: number, eolCount: number, firstLineLength: number, lastLineLength: number, firstCharCode: number): void {483// Here are the cases I used to think about this:484//485// 1. The token is completely before the insertion point486// ----------- |487// 2. The token ends precisely at the insertion point488// -----------|489// 3. The token contains the insertion point490// -----|------491// 4. The token starts precisely at the insertion point492// |-----------493// 5. The token is completely after the insertion point494// | -----------495//496const isInsertingPreciselyOneWordCharacter = (497eolCount === 0498&& firstLineLength === 1499&& (500(firstCharCode >= CharCode.Digit0 && firstCharCode <= CharCode.Digit9)501|| (firstCharCode >= CharCode.A && firstCharCode <= CharCode.Z)502|| (firstCharCode >= CharCode.a && firstCharCode <= CharCode.z)503)504);505const tokens = this._tokens;506const tokenCount = this._tokenCount;507for (let i = 0; i < tokenCount; i++) {508const offset = 4 * i;509let tokenDeltaLine = tokens[offset];510let tokenStartCharacter = tokens[offset + 1];511let tokenEndCharacter = tokens[offset + 2];512513if (tokenDeltaLine < deltaLine || (tokenDeltaLine === deltaLine && tokenEndCharacter < character)) {514// 1. The token is completely before the insertion point515// => nothing to do516continue;517} else if (tokenDeltaLine === deltaLine && tokenEndCharacter === character) {518// 2. The token ends precisely at the insertion point519// => expand the end character only if inserting precisely one character that is a word character520if (isInsertingPreciselyOneWordCharacter) {521tokenEndCharacter += 1;522} else {523continue;524}525} else if (tokenDeltaLine === deltaLine && tokenStartCharacter < character && character < tokenEndCharacter) {526// 3. The token contains the insertion point527if (eolCount === 0) {528// => just expand the end character529tokenEndCharacter += firstLineLength;530} else {531// => cut off the token532tokenEndCharacter = character;533}534} else {535// 4. or 5.536if (tokenDeltaLine === deltaLine && tokenStartCharacter === character) {537// 4. The token starts precisely at the insertion point538// => grow the token (by keeping its start constant) only if inserting precisely one character that is a word character539// => otherwise behave as in case 5.540if (isInsertingPreciselyOneWordCharacter) {541continue;542}543}544// => the token must move and keep its size constant545if (tokenDeltaLine === deltaLine) {546tokenDeltaLine += eolCount;547// this token is on the line where the insertion is taking place548if (eolCount === 0) {549tokenStartCharacter += firstLineLength;550tokenEndCharacter += firstLineLength;551} else {552const tokenLength = tokenEndCharacter - tokenStartCharacter;553tokenStartCharacter = lastLineLength + (tokenStartCharacter - character);554tokenEndCharacter = tokenStartCharacter + tokenLength;555}556} else {557tokenDeltaLine += eolCount;558}559}560561tokens[offset] = tokenDeltaLine;562tokens[offset + 1] = tokenStartCharacter;563tokens[offset + 2] = tokenEndCharacter;564}565}566567private static _rateLimiter = new RateLimiter(10 / 60); // limit to 10 times per minute568569public reportIfInvalid(model: ITextModel, startLineNumber: number): void {570for (let i = 0; i < this._tokenCount; i++) {571const lineNumber = this._getDeltaLine(i) + startLineNumber;572573if (lineNumber < 1) {574SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {575console.error('Invalid Semantic Tokens Data From Extension: lineNumber < 1');576});577} else if (lineNumber > model.getLineCount()) {578SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {579console.error('Invalid Semantic Tokens Data From Extension: lineNumber > model.getLineCount()');580});581} else if (this._getEndCharacter(i) > model.getLineLength(lineNumber)) {582SparseMultilineTokensStorage._rateLimiter.runIfNotLimited(() => {583console.error('Invalid Semantic Tokens Data From Extension: end character > model.getLineLength(lineNumber)');584});585}586}587}588}589590export class SparseLineTokens {591592private readonly _tokens: Uint32Array;593594constructor(tokens: Uint32Array) {595this._tokens = tokens;596}597598public getCount(): number {599return this._tokens.length / 4;600}601602public getStartCharacter(tokenIndex: number): number {603return this._tokens[4 * tokenIndex + 1];604}605606public getEndCharacter(tokenIndex: number): number {607return this._tokens[4 * tokenIndex + 2];608}609610public getMetadata(tokenIndex: number): number {611return this._tokens[4 * tokenIndex + 3];612}613}614615616