Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/languages/textToHtmlTokenizer.ts
5252 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { CharCode } from '../../../base/common/charCode.js';
7
import * as strings from '../../../base/common/strings.js';
8
import { IViewLineTokens, LineTokens } from '../tokens/lineTokens.js';
9
import { ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from '../languages.js';
10
import { LanguageId } from '../encodedTokenAttributes.js';
11
import { NullState, nullTokenizeEncoded } from './nullTokenize.js';
12
import { ILanguageService } from './language.js';
13
14
export type IReducedTokenizationSupport = Omit<ITokenizationSupport, 'tokenize'>;
15
16
const fallback: IReducedTokenizationSupport = {
17
getInitialState: () => NullState,
18
tokenizeEncoded: (buffer: string, hasEOL: boolean, state: IState) => nullTokenizeEncoded(LanguageId.Null, state)
19
};
20
21
export function tokenizeToStringSync(languageService: ILanguageService, text: string, languageId: string): string {
22
return _tokenizeToString(text, languageService.languageIdCodec, TokenizationRegistry.get(languageId) || fallback);
23
}
24
25
export async function tokenizeToString(languageService: ILanguageService, text: string, languageId: string | null): Promise<string> {
26
if (!languageId) {
27
return _tokenizeToString(text, languageService.languageIdCodec, fallback);
28
}
29
const tokenizationSupport = await TokenizationRegistry.getOrCreate(languageId);
30
return _tokenizeToString(text, languageService.languageIdCodec, tokenizationSupport || fallback);
31
}
32
33
export function tokenizeLineToHTML(text: string, viewLineTokens: IViewLineTokens, colorMap: string[], startOffset: number, endOffset: number, tabSize: number, useNbsp: boolean): string {
34
let result = `<div>`;
35
let charIndex = 0;
36
let width = 0;
37
38
let prevIsSpace = true;
39
40
for (let tokenIndex = 0, tokenCount = viewLineTokens.getCount(); tokenIndex < tokenCount; tokenIndex++) {
41
const tokenEndIndex = viewLineTokens.getEndOffset(tokenIndex);
42
let partContent = '';
43
44
for (; charIndex < tokenEndIndex && charIndex < endOffset; charIndex++) {
45
const charCode = text.charCodeAt(charIndex);
46
const isTab = charCode === CharCode.Tab;
47
48
width += strings.isFullWidthCharacter(charCode) ? 2 : (isTab ? 0 : 1);
49
50
if (charIndex < startOffset) {
51
if (isTab) {
52
const remainder = width % tabSize;
53
width += remainder === 0 ? tabSize : tabSize - remainder;
54
}
55
continue;
56
}
57
58
switch (charCode) {
59
case CharCode.Tab: {
60
const remainder = width % tabSize;
61
const insertSpacesCount = remainder === 0 ? tabSize : tabSize - remainder;
62
width += insertSpacesCount;
63
let spacesRemaining = insertSpacesCount;
64
while (spacesRemaining > 0) {
65
if (useNbsp && prevIsSpace) {
66
partContent += '&#160;';
67
prevIsSpace = false;
68
} else {
69
partContent += ' ';
70
prevIsSpace = true;
71
}
72
spacesRemaining--;
73
}
74
break;
75
}
76
case CharCode.LessThan:
77
partContent += '&lt;';
78
prevIsSpace = false;
79
break;
80
81
case CharCode.GreaterThan:
82
partContent += '&gt;';
83
prevIsSpace = false;
84
break;
85
86
case CharCode.Ampersand:
87
partContent += '&amp;';
88
prevIsSpace = false;
89
break;
90
91
case CharCode.Null:
92
partContent += '&#00;';
93
prevIsSpace = false;
94
break;
95
96
case CharCode.UTF8_BOM:
97
case CharCode.LINE_SEPARATOR:
98
case CharCode.PARAGRAPH_SEPARATOR:
99
case CharCode.NEXT_LINE:
100
partContent += '\ufffd';
101
prevIsSpace = false;
102
break;
103
104
case CharCode.CarriageReturn:
105
// zero width space, because carriage return would introduce a line break
106
partContent += '&#8203';
107
prevIsSpace = false;
108
break;
109
110
case CharCode.Space:
111
if (useNbsp && prevIsSpace) {
112
partContent += '&#160;';
113
prevIsSpace = false;
114
} else {
115
partContent += ' ';
116
prevIsSpace = true;
117
}
118
break;
119
120
default:
121
partContent += String.fromCharCode(charCode);
122
prevIsSpace = false;
123
}
124
}
125
126
if (tokenEndIndex <= startOffset) {
127
continue;
128
}
129
130
result += `<span style="${viewLineTokens.getInlineStyle(tokenIndex, colorMap)}">${partContent}</span>`;
131
132
if (tokenEndIndex > endOffset || charIndex >= endOffset || startOffset >= endOffset) {
133
break;
134
}
135
}
136
137
result += `</div>`;
138
return result;
139
}
140
141
export function _tokenizeToString(text: string, languageIdCodec: ILanguageIdCodec, tokenizationSupport: IReducedTokenizationSupport): string {
142
let result = `<div class="monaco-tokenized-source">`;
143
const lines = strings.splitLines(text);
144
let currentState = tokenizationSupport.getInitialState();
145
for (let i = 0, len = lines.length; i < len; i++) {
146
const line = lines[i];
147
148
if (i > 0) {
149
result += `<br/>`;
150
}
151
152
const tokenizationResult = tokenizationSupport.tokenizeEncoded(line, true, currentState);
153
LineTokens.convertToEndOffset(tokenizationResult.tokens, line.length);
154
const lineTokens = new LineTokens(tokenizationResult.tokens, line, languageIdCodec);
155
const viewLineTokens = lineTokens.inflate();
156
157
let startOffset = 0;
158
for (let j = 0, lenJ = viewLineTokens.getCount(); j < lenJ; j++) {
159
const type = viewLineTokens.getClassName(j);
160
const endIndex = viewLineTokens.getEndOffset(j);
161
result += `<span class="${type}">${strings.escape(line.substring(startOffset, endIndex))}</span>`;
162
startOffset = endIndex;
163
}
164
165
currentState = tokenizationResult.endState;
166
}
167
168
result += `</div>`;
169
return result;
170
}
171
172