Path: blob/main/src/vs/editor/common/diff/defaultLinesDiffComputer/linesSliceCharSequence.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { findLastIdxMonotonous, findLastMonotonous, findFirstMonotonous } from '../../../../base/common/arraysFind.js';6import { CharCode } from '../../../../base/common/charCode.js';7import { OffsetRange } from '../../core/ranges/offsetRange.js';8import { Position } from '../../core/position.js';9import { Range } from '../../core/range.js';10import { ISequence } from './algorithms/diffAlgorithm.js';11import { isSpace } from './utils.js';1213export class LinesSliceCharSequence implements ISequence {14private readonly elements: number[] = [];15private readonly firstElementOffsetByLineIdx: number[] = [];16private readonly lineStartOffsets: number[] = [];17private readonly trimmedWsLengthsByLineIdx: number[] = [];1819constructor(public readonly lines: string[], private readonly range: Range, public readonly considerWhitespaceChanges: boolean) {20this.firstElementOffsetByLineIdx.push(0);21for (let lineNumber = this.range.startLineNumber; lineNumber <= this.range.endLineNumber; lineNumber++) {22let line = lines[lineNumber - 1];23let lineStartOffset = 0;24if (lineNumber === this.range.startLineNumber && this.range.startColumn > 1) {25lineStartOffset = this.range.startColumn - 1;26line = line.substring(lineStartOffset);27}28this.lineStartOffsets.push(lineStartOffset);2930let trimmedWsLength = 0;31if (!considerWhitespaceChanges) {32const trimmedStartLine = line.trimStart();33trimmedWsLength = line.length - trimmedStartLine.length;34line = trimmedStartLine.trimEnd();35}36this.trimmedWsLengthsByLineIdx.push(trimmedWsLength);3738const lineLength = lineNumber === this.range.endLineNumber ? Math.min(this.range.endColumn - 1 - lineStartOffset - trimmedWsLength, line.length) : line.length;39for (let i = 0; i < lineLength; i++) {40this.elements.push(line.charCodeAt(i));41}4243if (lineNumber < this.range.endLineNumber) {44this.elements.push('\n'.charCodeAt(0));45this.firstElementOffsetByLineIdx.push(this.elements.length);46}47}48}4950toString() {51return `Slice: "${this.text}"`;52}5354get text(): string {55return this.getText(new OffsetRange(0, this.length));56}5758getText(range: OffsetRange): string {59return this.elements.slice(range.start, range.endExclusive).map(e => String.fromCharCode(e)).join('');60}6162getElement(offset: number): number {63return this.elements[offset];64}6566get length(): number {67return this.elements.length;68}6970public getBoundaryScore(length: number): number {71// a b c , d e f72// 11 0 0 12 15 6 13 0 0 117374const prevCategory = getCategory(length > 0 ? this.elements[length - 1] : -1);75const nextCategory = getCategory(length < this.elements.length ? this.elements[length] : -1);7677if (prevCategory === CharBoundaryCategory.LineBreakCR && nextCategory === CharBoundaryCategory.LineBreakLF) {78// don't break between \r and \n79return 0;80}81if (prevCategory === CharBoundaryCategory.LineBreakLF) {82// prefer the linebreak before the change83return 150;84}8586let score = 0;87if (prevCategory !== nextCategory) {88score += 10;89if (prevCategory === CharBoundaryCategory.WordLower && nextCategory === CharBoundaryCategory.WordUpper) {90score += 1;91}92}9394score += getCategoryBoundaryScore(prevCategory);95score += getCategoryBoundaryScore(nextCategory);9697return score;98}99100public translateOffset(offset: number, preference: 'left' | 'right' = 'right'): Position {101// find smallest i, so that lineBreakOffsets[i] <= offset using binary search102const i = findLastIdxMonotonous(this.firstElementOffsetByLineIdx, (value) => value <= offset);103const lineOffset = offset - this.firstElementOffsetByLineIdx[i];104return new Position(105this.range.startLineNumber + i,1061 + this.lineStartOffsets[i] + lineOffset + ((lineOffset === 0 && preference === 'left') ? 0 : this.trimmedWsLengthsByLineIdx[i])107);108}109110public translateRange(range: OffsetRange): Range {111const pos1 = this.translateOffset(range.start, 'right');112const pos2 = this.translateOffset(range.endExclusive, 'left');113if (pos2.isBefore(pos1)) {114return Range.fromPositions(pos2, pos2);115}116return Range.fromPositions(pos1, pos2);117}118119/**120* Finds the word that contains the character at the given offset121*/122public findWordContaining(offset: number): OffsetRange | undefined {123if (offset < 0 || offset >= this.elements.length) {124return undefined;125}126127if (!isWordChar(this.elements[offset])) {128return undefined;129}130131// find start132let start = offset;133while (start > 0 && isWordChar(this.elements[start - 1])) {134start--;135}136137// find end138let end = offset;139while (end < this.elements.length && isWordChar(this.elements[end])) {140end++;141}142143return new OffsetRange(start, end);144}145146/** fooBar has the two sub-words foo and bar */147public findSubWordContaining(offset: number): OffsetRange | undefined {148if (offset < 0 || offset >= this.elements.length) {149return undefined;150}151152if (!isWordChar(this.elements[offset])) {153return undefined;154}155156// find start157let start = offset;158while (start > 0 && isWordChar(this.elements[start - 1]) && !isUpperCase(this.elements[start])) {159start--;160}161162// find end163let end = offset;164while (end < this.elements.length && isWordChar(this.elements[end]) && !isUpperCase(this.elements[end])) {165end++;166}167168return new OffsetRange(start, end);169}170171public countLinesIn(range: OffsetRange): number {172return this.translateOffset(range.endExclusive).lineNumber - this.translateOffset(range.start).lineNumber;173}174175public isStronglyEqual(offset1: number, offset2: number): boolean {176return this.elements[offset1] === this.elements[offset2];177}178179public extendToFullLines(range: OffsetRange): OffsetRange {180const start = findLastMonotonous(this.firstElementOffsetByLineIdx, x => x <= range.start) ?? 0;181const end = findFirstMonotonous(this.firstElementOffsetByLineIdx, x => range.endExclusive <= x) ?? this.elements.length;182return new OffsetRange(start, end);183}184}185186function isWordChar(charCode: number): boolean {187return charCode >= CharCode.a && charCode <= CharCode.z188|| charCode >= CharCode.A && charCode <= CharCode.Z189|| charCode >= CharCode.Digit0 && charCode <= CharCode.Digit9;190}191192function isUpperCase(charCode: number): boolean {193return charCode >= CharCode.A && charCode <= CharCode.Z;194}195196const enum CharBoundaryCategory {197WordLower,198WordUpper,199WordNumber,200End,201Other,202Separator,203Space,204LineBreakCR,205LineBreakLF,206}207208const score: Record<CharBoundaryCategory, number> = {209[CharBoundaryCategory.WordLower]: 0,210[CharBoundaryCategory.WordUpper]: 0,211[CharBoundaryCategory.WordNumber]: 0,212[CharBoundaryCategory.End]: 10,213[CharBoundaryCategory.Other]: 2,214[CharBoundaryCategory.Separator]: 30,215[CharBoundaryCategory.Space]: 3,216[CharBoundaryCategory.LineBreakCR]: 10,217[CharBoundaryCategory.LineBreakLF]: 10,218};219220function getCategoryBoundaryScore(category: CharBoundaryCategory): number {221return score[category];222}223224function getCategory(charCode: number): CharBoundaryCategory {225if (charCode === CharCode.LineFeed) {226return CharBoundaryCategory.LineBreakLF;227} else if (charCode === CharCode.CarriageReturn) {228return CharBoundaryCategory.LineBreakCR;229} else if (isSpace(charCode)) {230return CharBoundaryCategory.Space;231} else if (charCode >= CharCode.a && charCode <= CharCode.z) {232return CharBoundaryCategory.WordLower;233} else if (charCode >= CharCode.A && charCode <= CharCode.Z) {234return CharBoundaryCategory.WordUpper;235} else if (charCode >= CharCode.Digit0 && charCode <= CharCode.Digit9) {236return CharBoundaryCategory.WordNumber;237} else if (charCode === -1) {238return CharBoundaryCategory.End;239} else if (charCode === CharCode.Comma || charCode === CharCode.Semicolon) {240return CharBoundaryCategory.Separator;241} else {242return CharBoundaryCategory.Other;243}244}245246247248