Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/languages/textToHtmlTokenizer.ts
3294 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import * as strings from '../../../base/common/strings.js';
8
import { IViewLineTokens, LineTokens } from '../tokens/lineTokens.js';
9
import { ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from '../languages.js';
10
import { LanguageId } from '../encodedTokenAttributes.js';
11
import { NullState, nullTokenizeEncoded } from './nullTokenize.js';
12
import { ILanguageService } from './language.js';
13
14
export type IReducedTokenizationSupport = Omit<ITokenizationSupport, 'tokenize'>;
15
16
const fallback: IReducedTokenizationSupport = {
17
getInitialState: () => NullState,
18
tokenizeEncoded: (buffer: string, hasEOL: boolean, state: IState) => nullTokenizeEncoded(LanguageId.Null, state)
19
};
20
21
export function tokenizeToStringSync(languageService: ILanguageService, text: string, languageId: string): string {
22
return _tokenizeToString(text, languageService.languageIdCodec, TokenizationRegistry.get(languageId) || fallback);
23
}
24
25
export async function tokenizeToString(languageService: ILanguageService, text: string, languageId: string | null): Promise<string> {
26
if (!languageId) {
27
return _tokenizeToString(text, languageService.languageIdCodec, fallback);
28
}
29
const tokenizationSupport = await TokenizationRegistry.getOrCreate(languageId);
30
return _tokenizeToString(text, languageService.languageIdCodec, tokenizationSupport || fallback);
31
}
32
33
export function tokenizeLineToHTML(text: string, viewLineTokens: IViewLineTokens, colorMap: string[], startOffset: number, endOffset: number, tabSize: number, useNbsp: boolean): string {
34
let result = `<div>`;
35
let charIndex = startOffset;
36
let tabsCharDelta = 0;
37
38
let prevIsSpace = true;
39
40
for (let tokenIndex = 0, tokenCount = viewLineTokens.getCount(); tokenIndex < tokenCount; tokenIndex++) {
41
const tokenEndIndex = viewLineTokens.getEndOffset(tokenIndex);
42
43
if (tokenEndIndex <= startOffset) {
44
continue;
45
}
46
47
let partContent = '';
48
49
for (; charIndex < tokenEndIndex && charIndex < endOffset; charIndex++) {
50
const charCode = text.charCodeAt(charIndex);
51
52
switch (charCode) {
53
case CharCode.Tab: {
54
let insertSpacesCount = tabSize - (charIndex + tabsCharDelta) % tabSize;
55
tabsCharDelta += insertSpacesCount - 1;
56
while (insertSpacesCount > 0) {
57
if (useNbsp && prevIsSpace) {
58
partContent += '&#160;';
59
prevIsSpace = false;
60
} else {
61
partContent += ' ';
62
prevIsSpace = true;
63
}
64
insertSpacesCount--;
65
}
66
break;
67
}
68
case CharCode.LessThan:
69
partContent += '&lt;';
70
prevIsSpace = false;
71
break;
72
73
case CharCode.GreaterThan:
74
partContent += '&gt;';
75
prevIsSpace = false;
76
break;
77
78
case CharCode.Ampersand:
79
partContent += '&amp;';
80
prevIsSpace = false;
81
break;
82
83
case CharCode.Null:
84
partContent += '&#00;';
85
prevIsSpace = false;
86
break;
87
88
case CharCode.UTF8_BOM:
89
case CharCode.LINE_SEPARATOR:
90
case CharCode.PARAGRAPH_SEPARATOR:
91
case CharCode.NEXT_LINE:
92
partContent += '\ufffd';
93
prevIsSpace = false;
94
break;
95
96
case CharCode.CarriageReturn:
97
// zero width space, because carriage return would introduce a line break
98
partContent += '&#8203';
99
prevIsSpace = false;
100
break;
101
102
case CharCode.Space:
103
if (useNbsp && prevIsSpace) {
104
partContent += '&#160;';
105
prevIsSpace = false;
106
} else {
107
partContent += ' ';
108
prevIsSpace = true;
109
}
110
break;
111
112
default:
113
partContent += String.fromCharCode(charCode);
114
prevIsSpace = false;
115
}
116
}
117
118
result += `<span style="${viewLineTokens.getInlineStyle(tokenIndex, colorMap)}">${partContent}</span>`;
119
120
if (tokenEndIndex > endOffset || charIndex >= endOffset) {
121
break;
122
}
123
}
124
125
result += `</div>`;
126
return result;
127
}
128
129
export function _tokenizeToString(text: string, languageIdCodec: ILanguageIdCodec, tokenizationSupport: IReducedTokenizationSupport): string {
130
let result = `<div class="monaco-tokenized-source">`;
131
const lines = strings.splitLines(text);
132
let currentState = tokenizationSupport.getInitialState();
133
for (let i = 0, len = lines.length; i < len; i++) {
134
const line = lines[i];
135
136
if (i > 0) {
137
result += `<br/>`;
138
}
139
140
const tokenizationResult = tokenizationSupport.tokenizeEncoded(line, true, currentState);
141
LineTokens.convertToEndOffset(tokenizationResult.tokens, line.length);
142
const lineTokens = new LineTokens(tokenizationResult.tokens, line, languageIdCodec);
143
const viewLineTokens = lineTokens.inflate();
144
145
let startOffset = 0;
146
for (let j = 0, lenJ = viewLineTokens.getCount(); j < lenJ; j++) {
147
const type = viewLineTokens.getClassName(j);
148
const endIndex = viewLineTokens.getEndOffset(j);
149
result += `<span class="${type}">${strings.escape(line.substring(startOffset, endIndex))}</span>`;
150
startOffset = endIndex;
151
}
152
153
currentState = tokenizationResult.endState;
154
}
155
156
result += `</div>`;
157
return result;
158
}
159
160