CoCalc -- textToHtmlTokenizer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/editor/common/languages/textToHtmlTokenizer.ts
⁵²⁵² views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { CharCode } from '../../../base/common/charCode.js';
7
import * as strings from '../../../base/common/strings.js';
8
import { IViewLineTokens, LineTokens } from '../tokens/lineTokens.js';
9
import { ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from '../languages.js';
10
import { LanguageId } from '../encodedTokenAttributes.js';
11
import { NullState, nullTokenizeEncoded } from './nullTokenize.js';
12
import { ILanguageService } from './language.js';
13

14
export type IReducedTokenizationSupport = Omit<ITokenizationSupport, 'tokenize'>;
15

16
const fallback: IReducedTokenizationSupport = {
17
	getInitialState: () => NullState,
18
	tokenizeEncoded: (buffer: string, hasEOL: boolean, state: IState) => nullTokenizeEncoded(LanguageId.Null, state)
19
};
20

21
export function tokenizeToStringSync(languageService: ILanguageService, text: string, languageId: string): string {
22
	return _tokenizeToString(text, languageService.languageIdCodec, TokenizationRegistry.get(languageId) || fallback);
23
}
24

25
export async function tokenizeToString(languageService: ILanguageService, text: string, languageId: string | null): Promise<string> {
26
	if (!languageId) {
27
		return _tokenizeToString(text, languageService.languageIdCodec, fallback);
28
	}
29
	const tokenizationSupport = await TokenizationRegistry.getOrCreate(languageId);
30
	return _tokenizeToString(text, languageService.languageIdCodec, tokenizationSupport || fallback);
31
}
32

33
export function tokenizeLineToHTML(text: string, viewLineTokens: IViewLineTokens, colorMap: string[], startOffset: number, endOffset: number, tabSize: number, useNbsp: boolean): string {
34
	let result = `<div>`;
35
	let charIndex = 0;
36
	let width = 0;
37

38
	let prevIsSpace = true;
39

40
	for (let tokenIndex = 0, tokenCount = viewLineTokens.getCount(); tokenIndex < tokenCount; tokenIndex++) {
41
		const tokenEndIndex = viewLineTokens.getEndOffset(tokenIndex);
42
		let partContent = '';
43

44
		for (; charIndex < tokenEndIndex && charIndex < endOffset; charIndex++) {
45
			const charCode = text.charCodeAt(charIndex);
46
			const isTab = charCode === CharCode.Tab;
47

48
			width += strings.isFullWidthCharacter(charCode) ? 2 : (isTab ? 0 : 1);
49

50
			if (charIndex < startOffset) {
51
				if (isTab) {
52
					const remainder = width % tabSize;
53
					width += remainder === 0 ? tabSize : tabSize - remainder;
54
				}
55
				continue;
56
			}
57

58
			switch (charCode) {
59
				case CharCode.Tab: {
60
					const remainder = width % tabSize;
61
					const insertSpacesCount = remainder === 0 ? tabSize : tabSize - remainder;
62
					width += insertSpacesCount;
63
					let spacesRemaining = insertSpacesCount;
64
					while (spacesRemaining > 0) {
65
						if (useNbsp && prevIsSpace) {
66
							partContent += '&#160;';
67
							prevIsSpace = false;
68
						} else {
69
							partContent += ' ';
70
							prevIsSpace = true;
71
						}
72
						spacesRemaining--;
73
					}
74
					break;
75
				}
76
				case CharCode.LessThan:
77
					partContent += '&lt;';
78
					prevIsSpace = false;
79
					break;
80

81
				case CharCode.GreaterThan:
82
					partContent += '&gt;';
83
					prevIsSpace = false;
84
					break;
85

86
				case CharCode.Ampersand:
87
					partContent += '&amp;';
88
					prevIsSpace = false;
89
					break;
90

91
				case CharCode.Null:
92
					partContent += '&#00;';
93
					prevIsSpace = false;
94
					break;
95

96
				case CharCode.UTF8_BOM:
97
				case CharCode.LINE_SEPARATOR:
98
				case CharCode.PARAGRAPH_SEPARATOR:
99
				case CharCode.NEXT_LINE:
100
					partContent += '\ufffd';
101
					prevIsSpace = false;
102
					break;
103

104
				case CharCode.CarriageReturn:
105
					// zero width space, because carriage return would introduce a line break
106
					partContent += '&#8203';
107
					prevIsSpace = false;
108
					break;
109

110
				case CharCode.Space:
111
					if (useNbsp && prevIsSpace) {
112
						partContent += '&#160;';
113
						prevIsSpace = false;
114
					} else {
115
						partContent += ' ';
116
						prevIsSpace = true;
117
					}
118
					break;
119

120
				default:
121
					partContent += String.fromCharCode(charCode);
122
					prevIsSpace = false;
123
			}
124
		}
125

126
		if (tokenEndIndex <= startOffset) {
127
			continue;
128
		}
129

130
		result += `<span style="${viewLineTokens.getInlineStyle(tokenIndex, colorMap)}">${partContent}</span>`;
131

132
		if (tokenEndIndex > endOffset || charIndex >= endOffset || startOffset >= endOffset) {
133
			break;
134
		}
135
	}
136

137
	result += `</div>`;
138
	return result;
139
}
140

141
export function _tokenizeToString(text: string, languageIdCodec: ILanguageIdCodec, tokenizationSupport: IReducedTokenizationSupport): string {
142
	let result = `<div class="monaco-tokenized-source">`;
143
	const lines = strings.splitLines(text);
144
	let currentState = tokenizationSupport.getInitialState();
145
	for (let i = 0, len = lines.length; i < len; i++) {
146
		const line = lines[i];
147

148
		if (i > 0) {
149
			result += `<br/>`;
150
		}
151

152
		const tokenizationResult = tokenizationSupport.tokenizeEncoded(line, true, currentState);
153
		LineTokens.convertToEndOffset(tokenizationResult.tokens, line.length);
154
		const lineTokens = new LineTokens(tokenizationResult.tokens, line, languageIdCodec);
155
		const viewLineTokens = lineTokens.inflate();
156

157
		let startOffset = 0;
158
		for (let j = 0, lenJ = viewLineTokens.getCount(); j < lenJ; j++) {
159
			const type = viewLineTokens.getClassName(j);
160
			const endIndex = viewLineTokens.getEndOffset(j);
161
			result += `<span class="${type}">${strings.escape(line.substring(startOffset, endIndex))}</span>`;
162
			startOffset = endIndex;
163
		}
164

165
		currentState = tokenizationResult.endState;
166
	}
167

168
	result += `</div>`;
169
	return result;
170
}
171

172
Product

Resources

Company