Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/languages/linkComputer.ts
3294 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import { CharacterClassifier } from '../core/characterClassifier.js';
8
import { ILink } from '../languages.js';
9
10
export interface ILinkComputerTarget {
11
getLineCount(): number;
12
getLineContent(lineNumber: number): string;
13
}
14
15
export const enum State {
16
Invalid = 0,
17
Start = 1,
18
H = 2,
19
HT = 3,
20
HTT = 4,
21
HTTP = 5,
22
F = 6,
23
FI = 7,
24
FIL = 8,
25
BeforeColon = 9,
26
AfterColon = 10,
27
AlmostThere = 11,
28
End = 12,
29
Accept = 13,
30
LastKnownState = 14 // marker, custom states may follow
31
}
32
33
export type Edge = [State, number, State];
34
35
class Uint8Matrix {
36
37
private readonly _data: Uint8Array;
38
public readonly rows: number;
39
public readonly cols: number;
40
41
constructor(rows: number, cols: number, defaultValue: number) {
42
const data = new Uint8Array(rows * cols);
43
for (let i = 0, len = rows * cols; i < len; i++) {
44
data[i] = defaultValue;
45
}
46
47
this._data = data;
48
this.rows = rows;
49
this.cols = cols;
50
}
51
52
public get(row: number, col: number): number {
53
return this._data[row * this.cols + col];
54
}
55
56
public set(row: number, col: number, value: number): void {
57
this._data[row * this.cols + col] = value;
58
}
59
}
60
61
export class StateMachine {
62
63
private readonly _states: Uint8Matrix;
64
private readonly _maxCharCode: number;
65
66
constructor(edges: Edge[]) {
67
let maxCharCode = 0;
68
let maxState = State.Invalid;
69
for (let i = 0, len = edges.length; i < len; i++) {
70
const [from, chCode, to] = edges[i];
71
if (chCode > maxCharCode) {
72
maxCharCode = chCode;
73
}
74
if (from > maxState) {
75
maxState = from;
76
}
77
if (to > maxState) {
78
maxState = to;
79
}
80
}
81
82
maxCharCode++;
83
maxState++;
84
85
const states = new Uint8Matrix(maxState, maxCharCode, State.Invalid);
86
for (let i = 0, len = edges.length; i < len; i++) {
87
const [from, chCode, to] = edges[i];
88
states.set(from, chCode, to);
89
}
90
91
this._states = states;
92
this._maxCharCode = maxCharCode;
93
}
94
95
public nextState(currentState: State, chCode: number): State {
96
if (chCode < 0 || chCode >= this._maxCharCode) {
97
return State.Invalid;
98
}
99
return this._states.get(currentState, chCode);
100
}
101
}
102
103
// State machine for http:// or https:// or file://
104
let _stateMachine: StateMachine | null = null;
105
function getStateMachine(): StateMachine {
106
if (_stateMachine === null) {
107
_stateMachine = new StateMachine([
108
[State.Start, CharCode.h, State.H],
109
[State.Start, CharCode.H, State.H],
110
[State.Start, CharCode.f, State.F],
111
[State.Start, CharCode.F, State.F],
112
113
[State.H, CharCode.t, State.HT],
114
[State.H, CharCode.T, State.HT],
115
116
[State.HT, CharCode.t, State.HTT],
117
[State.HT, CharCode.T, State.HTT],
118
119
[State.HTT, CharCode.p, State.HTTP],
120
[State.HTT, CharCode.P, State.HTTP],
121
122
[State.HTTP, CharCode.s, State.BeforeColon],
123
[State.HTTP, CharCode.S, State.BeforeColon],
124
[State.HTTP, CharCode.Colon, State.AfterColon],
125
126
[State.F, CharCode.i, State.FI],
127
[State.F, CharCode.I, State.FI],
128
129
[State.FI, CharCode.l, State.FIL],
130
[State.FI, CharCode.L, State.FIL],
131
132
[State.FIL, CharCode.e, State.BeforeColon],
133
[State.FIL, CharCode.E, State.BeforeColon],
134
135
[State.BeforeColon, CharCode.Colon, State.AfterColon],
136
137
[State.AfterColon, CharCode.Slash, State.AlmostThere],
138
139
[State.AlmostThere, CharCode.Slash, State.End],
140
]);
141
}
142
return _stateMachine;
143
}
144
145
146
const enum CharacterClass {
147
None = 0,
148
ForceTermination = 1,
149
CannotEndIn = 2
150
}
151
152
let _classifier: CharacterClassifier<CharacterClass> | null = null;
153
function getClassifier(): CharacterClassifier<CharacterClass> {
154
if (_classifier === null) {
155
_classifier = new CharacterClassifier<CharacterClass>(CharacterClass.None);
156
157
// allow-any-unicode-next-line
158
const FORCE_TERMINATION_CHARACTERS = ' \t<>\'\"、。。、,.:;‘〈「『〔([{「」}])〕』」〉’`~…|';
159
for (let i = 0; i < FORCE_TERMINATION_CHARACTERS.length; i++) {
160
_classifier.set(FORCE_TERMINATION_CHARACTERS.charCodeAt(i), CharacterClass.ForceTermination);
161
}
162
163
const CANNOT_END_WITH_CHARACTERS = '.,;:';
164
for (let i = 0; i < CANNOT_END_WITH_CHARACTERS.length; i++) {
165
_classifier.set(CANNOT_END_WITH_CHARACTERS.charCodeAt(i), CharacterClass.CannotEndIn);
166
}
167
}
168
return _classifier;
169
}
170
171
export class LinkComputer {
172
173
private static _createLink(classifier: CharacterClassifier<CharacterClass>, line: string, lineNumber: number, linkBeginIndex: number, linkEndIndex: number): ILink {
174
// Do not allow to end link in certain characters...
175
let lastIncludedCharIndex = linkEndIndex - 1;
176
do {
177
const chCode = line.charCodeAt(lastIncludedCharIndex);
178
const chClass = classifier.get(chCode);
179
if (chClass !== CharacterClass.CannotEndIn) {
180
break;
181
}
182
lastIncludedCharIndex--;
183
} while (lastIncludedCharIndex > linkBeginIndex);
184
185
// Handle links enclosed in parens, square brackets and curlys.
186
if (linkBeginIndex > 0) {
187
const charCodeBeforeLink = line.charCodeAt(linkBeginIndex - 1);
188
const lastCharCodeInLink = line.charCodeAt(lastIncludedCharIndex);
189
190
if (
191
(charCodeBeforeLink === CharCode.OpenParen && lastCharCodeInLink === CharCode.CloseParen)
192
|| (charCodeBeforeLink === CharCode.OpenSquareBracket && lastCharCodeInLink === CharCode.CloseSquareBracket)
193
|| (charCodeBeforeLink === CharCode.OpenCurlyBrace && lastCharCodeInLink === CharCode.CloseCurlyBrace)
194
) {
195
// Do not end in ) if ( is before the link start
196
// Do not end in ] if [ is before the link start
197
// Do not end in } if { is before the link start
198
lastIncludedCharIndex--;
199
}
200
}
201
202
return {
203
range: {
204
startLineNumber: lineNumber,
205
startColumn: linkBeginIndex + 1,
206
endLineNumber: lineNumber,
207
endColumn: lastIncludedCharIndex + 2
208
},
209
url: line.substring(linkBeginIndex, lastIncludedCharIndex + 1)
210
};
211
}
212
213
public static computeLinks(model: ILinkComputerTarget, stateMachine: StateMachine = getStateMachine()): ILink[] {
214
const classifier = getClassifier();
215
216
const result: ILink[] = [];
217
for (let i = 1, lineCount = model.getLineCount(); i <= lineCount; i++) {
218
const line = model.getLineContent(i);
219
const len = line.length;
220
221
let j = 0;
222
let linkBeginIndex = 0;
223
let linkBeginChCode = 0;
224
let state = State.Start;
225
let hasOpenParens = false;
226
let hasOpenSquareBracket = false;
227
let inSquareBrackets = false;
228
let hasOpenCurlyBracket = false;
229
230
while (j < len) {
231
232
let resetStateMachine = false;
233
const chCode = line.charCodeAt(j);
234
235
if (state === State.Accept) {
236
let chClass: CharacterClass;
237
switch (chCode) {
238
case CharCode.OpenParen:
239
hasOpenParens = true;
240
chClass = CharacterClass.None;
241
break;
242
case CharCode.CloseParen:
243
chClass = (hasOpenParens ? CharacterClass.None : CharacterClass.ForceTermination);
244
break;
245
case CharCode.OpenSquareBracket:
246
inSquareBrackets = true;
247
hasOpenSquareBracket = true;
248
chClass = CharacterClass.None;
249
break;
250
case CharCode.CloseSquareBracket:
251
inSquareBrackets = false;
252
chClass = (hasOpenSquareBracket ? CharacterClass.None : CharacterClass.ForceTermination);
253
break;
254
case CharCode.OpenCurlyBrace:
255
hasOpenCurlyBracket = true;
256
chClass = CharacterClass.None;
257
break;
258
case CharCode.CloseCurlyBrace:
259
chClass = (hasOpenCurlyBracket ? CharacterClass.None : CharacterClass.ForceTermination);
260
break;
261
262
// The following three rules make it that ' or " or ` are allowed inside links
263
// only if the link is wrapped by some other quote character
264
case CharCode.SingleQuote:
265
case CharCode.DoubleQuote:
266
case CharCode.BackTick:
267
if (linkBeginChCode === chCode) {
268
chClass = CharacterClass.ForceTermination;
269
} else if (linkBeginChCode === CharCode.SingleQuote || linkBeginChCode === CharCode.DoubleQuote || linkBeginChCode === CharCode.BackTick) {
270
chClass = CharacterClass.None;
271
} else {
272
chClass = CharacterClass.ForceTermination;
273
}
274
break;
275
case CharCode.Asterisk:
276
// `*` terminates a link if the link began with `*`
277
chClass = (linkBeginChCode === CharCode.Asterisk) ? CharacterClass.ForceTermination : CharacterClass.None;
278
break;
279
case CharCode.Space:
280
// ` ` allow space in between [ and ]
281
chClass = (inSquareBrackets ? CharacterClass.None : CharacterClass.ForceTermination);
282
break;
283
default:
284
chClass = classifier.get(chCode);
285
}
286
287
// Check if character terminates link
288
if (chClass === CharacterClass.ForceTermination) {
289
result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, j));
290
resetStateMachine = true;
291
}
292
} else if (state === State.End) {
293
294
let chClass: CharacterClass;
295
if (chCode === CharCode.OpenSquareBracket) {
296
// Allow for the authority part to contain ipv6 addresses which contain [ and ]
297
hasOpenSquareBracket = true;
298
chClass = CharacterClass.None;
299
} else {
300
chClass = classifier.get(chCode);
301
}
302
303
// Check if character terminates link
304
if (chClass === CharacterClass.ForceTermination) {
305
resetStateMachine = true;
306
} else {
307
state = State.Accept;
308
}
309
} else {
310
state = stateMachine.nextState(state, chCode);
311
if (state === State.Invalid) {
312
resetStateMachine = true;
313
}
314
}
315
316
if (resetStateMachine) {
317
state = State.Start;
318
hasOpenParens = false;
319
hasOpenSquareBracket = false;
320
hasOpenCurlyBracket = false;
321
322
// Record where the link started
323
linkBeginIndex = j + 1;
324
linkBeginChCode = chCode;
325
}
326
327
j++;
328
}
329
330
if (state === State.Accept) {
331
result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, len));
332
}
333
334
}
335
336
return result;
337
}
338
}
339
340
/**
341
* Returns an array of all links contains in the provided
342
* document. *Note* that this operation is computational
343
* expensive and should not run in the UI thread.
344
*/
345
export function computeLinks(model: ILinkComputerTarget | null): ILink[] {
346
if (!model || typeof model.getLineCount !== 'function' || typeof model.getLineContent !== 'function') {
347
// Unknown caller!
348
return [];
349
}
350
return LinkComputer.computeLinks(model);
351
}
352
353