Path: blob/main/src/vs/editor/common/languages/linkComputer.ts
3294 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { CharCode } from '../../../base/common/charCode.js';6import { CharacterClassifier } from '../core/characterClassifier.js';7import { ILink } from '../languages.js';89export interface ILinkComputerTarget {10getLineCount(): number;11getLineContent(lineNumber: number): string;12}1314export const enum State {15Invalid = 0,16Start = 1,17H = 2,18HT = 3,19HTT = 4,20HTTP = 5,21F = 6,22FI = 7,23FIL = 8,24BeforeColon = 9,25AfterColon = 10,26AlmostThere = 11,27End = 12,28Accept = 13,29LastKnownState = 14 // marker, custom states may follow30}3132export type Edge = [State, number, State];3334class Uint8Matrix {3536private readonly _data: Uint8Array;37public readonly rows: number;38public readonly cols: number;3940constructor(rows: number, cols: number, defaultValue: number) {41const data = new Uint8Array(rows * cols);42for (let i = 0, len = rows * cols; i < len; i++) {43data[i] = defaultValue;44}4546this._data = data;47this.rows = rows;48this.cols = cols;49}5051public get(row: number, col: number): number {52return this._data[row * this.cols + col];53}5455public set(row: number, col: number, value: number): void {56this._data[row * this.cols + col] = value;57}58}5960export class StateMachine {6162private readonly _states: Uint8Matrix;63private readonly _maxCharCode: number;6465constructor(edges: Edge[]) {66let maxCharCode = 0;67let maxState = State.Invalid;68for (let i = 0, len = edges.length; i < len; i++) {69const [from, chCode, to] = edges[i];70if (chCode > maxCharCode) {71maxCharCode = chCode;72}73if (from > maxState) {74maxState = from;75}76if (to > maxState) {77maxState = to;78}79}8081maxCharCode++;82maxState++;8384const states = new Uint8Matrix(maxState, maxCharCode, State.Invalid);85for (let i = 0, len = edges.length; i < len; i++) {86const [from, chCode, to] = edges[i];87states.set(from, chCode, to);88}8990this._states = states;91this._maxCharCode = maxCharCode;92}9394public nextState(currentState: State, chCode: number): State {95if (chCode < 0 || chCode >= this._maxCharCode) {96return State.Invalid;97}98return this._states.get(currentState, chCode);99}100}101102// State machine for http:// or https:// or file://103let _stateMachine: StateMachine | null = null;104function getStateMachine(): StateMachine {105if (_stateMachine === null) {106_stateMachine = new StateMachine([107[State.Start, CharCode.h, State.H],108[State.Start, CharCode.H, State.H],109[State.Start, CharCode.f, State.F],110[State.Start, CharCode.F, State.F],111112[State.H, CharCode.t, State.HT],113[State.H, CharCode.T, State.HT],114115[State.HT, CharCode.t, State.HTT],116[State.HT, CharCode.T, State.HTT],117118[State.HTT, CharCode.p, State.HTTP],119[State.HTT, CharCode.P, State.HTTP],120121[State.HTTP, CharCode.s, State.BeforeColon],122[State.HTTP, CharCode.S, State.BeforeColon],123[State.HTTP, CharCode.Colon, State.AfterColon],124125[State.F, CharCode.i, State.FI],126[State.F, CharCode.I, State.FI],127128[State.FI, CharCode.l, State.FIL],129[State.FI, CharCode.L, State.FIL],130131[State.FIL, CharCode.e, State.BeforeColon],132[State.FIL, CharCode.E, State.BeforeColon],133134[State.BeforeColon, CharCode.Colon, State.AfterColon],135136[State.AfterColon, CharCode.Slash, State.AlmostThere],137138[State.AlmostThere, CharCode.Slash, State.End],139]);140}141return _stateMachine;142}143144145const enum CharacterClass {146None = 0,147ForceTermination = 1,148CannotEndIn = 2149}150151let _classifier: CharacterClassifier<CharacterClass> | null = null;152function getClassifier(): CharacterClassifier<CharacterClass> {153if (_classifier === null) {154_classifier = new CharacterClassifier<CharacterClass>(CharacterClass.None);155156// allow-any-unicode-next-line157const FORCE_TERMINATION_CHARACTERS = ' \t<>\'\"、。。、,.:;‘〈「『〔([{「」}])〕』」〉’`~…|';158for (let i = 0; i < FORCE_TERMINATION_CHARACTERS.length; i++) {159_classifier.set(FORCE_TERMINATION_CHARACTERS.charCodeAt(i), CharacterClass.ForceTermination);160}161162const CANNOT_END_WITH_CHARACTERS = '.,;:';163for (let i = 0; i < CANNOT_END_WITH_CHARACTERS.length; i++) {164_classifier.set(CANNOT_END_WITH_CHARACTERS.charCodeAt(i), CharacterClass.CannotEndIn);165}166}167return _classifier;168}169170export class LinkComputer {171172private static _createLink(classifier: CharacterClassifier<CharacterClass>, line: string, lineNumber: number, linkBeginIndex: number, linkEndIndex: number): ILink {173// Do not allow to end link in certain characters...174let lastIncludedCharIndex = linkEndIndex - 1;175do {176const chCode = line.charCodeAt(lastIncludedCharIndex);177const chClass = classifier.get(chCode);178if (chClass !== CharacterClass.CannotEndIn) {179break;180}181lastIncludedCharIndex--;182} while (lastIncludedCharIndex > linkBeginIndex);183184// Handle links enclosed in parens, square brackets and curlys.185if (linkBeginIndex > 0) {186const charCodeBeforeLink = line.charCodeAt(linkBeginIndex - 1);187const lastCharCodeInLink = line.charCodeAt(lastIncludedCharIndex);188189if (190(charCodeBeforeLink === CharCode.OpenParen && lastCharCodeInLink === CharCode.CloseParen)191|| (charCodeBeforeLink === CharCode.OpenSquareBracket && lastCharCodeInLink === CharCode.CloseSquareBracket)192|| (charCodeBeforeLink === CharCode.OpenCurlyBrace && lastCharCodeInLink === CharCode.CloseCurlyBrace)193) {194// Do not end in ) if ( is before the link start195// Do not end in ] if [ is before the link start196// Do not end in } if { is before the link start197lastIncludedCharIndex--;198}199}200201return {202range: {203startLineNumber: lineNumber,204startColumn: linkBeginIndex + 1,205endLineNumber: lineNumber,206endColumn: lastIncludedCharIndex + 2207},208url: line.substring(linkBeginIndex, lastIncludedCharIndex + 1)209};210}211212public static computeLinks(model: ILinkComputerTarget, stateMachine: StateMachine = getStateMachine()): ILink[] {213const classifier = getClassifier();214215const result: ILink[] = [];216for (let i = 1, lineCount = model.getLineCount(); i <= lineCount; i++) {217const line = model.getLineContent(i);218const len = line.length;219220let j = 0;221let linkBeginIndex = 0;222let linkBeginChCode = 0;223let state = State.Start;224let hasOpenParens = false;225let hasOpenSquareBracket = false;226let inSquareBrackets = false;227let hasOpenCurlyBracket = false;228229while (j < len) {230231let resetStateMachine = false;232const chCode = line.charCodeAt(j);233234if (state === State.Accept) {235let chClass: CharacterClass;236switch (chCode) {237case CharCode.OpenParen:238hasOpenParens = true;239chClass = CharacterClass.None;240break;241case CharCode.CloseParen:242chClass = (hasOpenParens ? CharacterClass.None : CharacterClass.ForceTermination);243break;244case CharCode.OpenSquareBracket:245inSquareBrackets = true;246hasOpenSquareBracket = true;247chClass = CharacterClass.None;248break;249case CharCode.CloseSquareBracket:250inSquareBrackets = false;251chClass = (hasOpenSquareBracket ? CharacterClass.None : CharacterClass.ForceTermination);252break;253case CharCode.OpenCurlyBrace:254hasOpenCurlyBracket = true;255chClass = CharacterClass.None;256break;257case CharCode.CloseCurlyBrace:258chClass = (hasOpenCurlyBracket ? CharacterClass.None : CharacterClass.ForceTermination);259break;260261// The following three rules make it that ' or " or ` are allowed inside links262// only if the link is wrapped by some other quote character263case CharCode.SingleQuote:264case CharCode.DoubleQuote:265case CharCode.BackTick:266if (linkBeginChCode === chCode) {267chClass = CharacterClass.ForceTermination;268} else if (linkBeginChCode === CharCode.SingleQuote || linkBeginChCode === CharCode.DoubleQuote || linkBeginChCode === CharCode.BackTick) {269chClass = CharacterClass.None;270} else {271chClass = CharacterClass.ForceTermination;272}273break;274case CharCode.Asterisk:275// `*` terminates a link if the link began with `*`276chClass = (linkBeginChCode === CharCode.Asterisk) ? CharacterClass.ForceTermination : CharacterClass.None;277break;278case CharCode.Space:279// ` ` allow space in between [ and ]280chClass = (inSquareBrackets ? CharacterClass.None : CharacterClass.ForceTermination);281break;282default:283chClass = classifier.get(chCode);284}285286// Check if character terminates link287if (chClass === CharacterClass.ForceTermination) {288result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, j));289resetStateMachine = true;290}291} else if (state === State.End) {292293let chClass: CharacterClass;294if (chCode === CharCode.OpenSquareBracket) {295// Allow for the authority part to contain ipv6 addresses which contain [ and ]296hasOpenSquareBracket = true;297chClass = CharacterClass.None;298} else {299chClass = classifier.get(chCode);300}301302// Check if character terminates link303if (chClass === CharacterClass.ForceTermination) {304resetStateMachine = true;305} else {306state = State.Accept;307}308} else {309state = stateMachine.nextState(state, chCode);310if (state === State.Invalid) {311resetStateMachine = true;312}313}314315if (resetStateMachine) {316state = State.Start;317hasOpenParens = false;318hasOpenSquareBracket = false;319hasOpenCurlyBracket = false;320321// Record where the link started322linkBeginIndex = j + 1;323linkBeginChCode = chCode;324}325326j++;327}328329if (state === State.Accept) {330result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, len));331}332333}334335return result;336}337}338339/**340* Returns an array of all links contains in the provided341* document. *Note* that this operation is computational342* expensive and should not run in the UI thread.343*/344export function computeLinks(model: ILinkComputerTarget | null): ILink[] {345if (!model || typeof model.getLineCount !== 'function' || typeof model.getLineContent !== 'function') {346// Unknown caller!347return [];348}349return LinkComputer.computeLinks(model);350}351352353