Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Disposable } from '../../../../base/common/lifecycle.js';
7
import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from '../common/languageDetectionWorkerService.js';
8
import { AppResourcePath, FileAccess, nodeModulesAsarPath, nodeModulesPath, Schemas } from '../../../../base/common/network.js';
9
import { IWorkbenchEnvironmentService } from '../../environment/common/environmentService.js';
10
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
11
import { ILanguageService } from '../../../../editor/common/languages/language.js';
12
import { URI } from '../../../../base/common/uri.js';
13
import { isWeb } from '../../../../base/common/platform.js';
14
import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
15
import { IModelService } from '../../../../editor/common/services/model.js';
16
import { IWebWorkerClient } from '../../../../base/common/worker/webWorker.js';
17
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';
18
import { IDiagnosticsService } from '../../../../platform/diagnostics/common/diagnostics.js';
19
import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';
20
import { IEditorService } from '../../editor/common/editorService.js';
21
import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';
22
import { LRUCache } from '../../../../base/common/map.js';
23
import { ILogService } from '../../../../platform/log/common/log.js';
24
import { canASAR } from '../../../../amdX.js';
25
import { createWebWorker } from '../../../../base/browser/webWorkerFactory.js';
26
import { WorkerTextModelSyncClient } from '../../../../editor/common/services/textModelSync/textModelSync.impl.js';
27
import { ILanguageDetectionWorker, LanguageDetectionWorkerHost } from './languageDetectionWorker.protocol.js';
28
29
const TOP_LANG_COUNTS = 12;
30
31
const regexpModuleLocation: AppResourcePath = `${nodeModulesPath}/vscode-regexp-languagedetection`;
32
const regexpModuleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/vscode-regexp-languagedetection`;
33
const moduleLocation: AppResourcePath = `${nodeModulesPath}/@vscode/vscode-languagedetection`;
34
const moduleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/@vscode/vscode-languagedetection`;
35
36
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
37
static readonly enablementSettingKey = 'workbench.editor.languageDetection';
38
static readonly historyBasedEnablementConfig = 'workbench.editor.historyBasedLanguageDetection';
39
static readonly preferHistoryConfig = 'workbench.editor.preferHistoryBasedLanguageDetection';
40
static readonly workspaceOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.workspace';
41
static readonly globalOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.global';
42
43
_serviceBrand: undefined;
44
45
private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;
46
47
private hasResolvedWorkspaceLanguageIds = false;
48
private workspaceLanguageIds = new Set<string>();
49
private sessionOpenedLanguageIds = new Set<string>();
50
private historicalGlobalOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);
51
private historicalWorkspaceOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);
52
private dirtyBiases: boolean = true;
53
private langBiases: Record<string, number> = {};
54
55
constructor(
56
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
57
@ILanguageService languageService: ILanguageService,
58
@IConfigurationService private readonly _configurationService: IConfigurationService,
59
@IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService,
60
@IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService,
61
@IModelService modelService: IModelService,
62
@IEditorService private readonly _editorService: IEditorService,
63
@ITelemetryService telemetryService: ITelemetryService,
64
@IStorageService storageService: IStorageService,
65
@ILogService private readonly _logService: ILogService
66
) {
67
super();
68
69
const useAsar = canASAR && this._environmentService.isBuilt && !isWeb;
70
this._languageDetectionWorkerClient = this._register(new LanguageDetectionWorkerClient(
71
modelService,
72
languageService,
73
telemetryService,
74
// TODO See if it's possible to bundle vscode-languagedetection
75
useAsar
76
? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`).toString(true)
77
: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`).toString(true),
78
useAsar
79
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`).toString(true)
80
: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`).toString(true),
81
useAsar
82
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`).toString(true)
83
: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`).toString(true),
84
useAsar
85
? FileAccess.asBrowserUri(`${regexpModuleLocationAsar}/dist/index.js`).toString(true)
86
: FileAccess.asBrowserUri(`${regexpModuleLocation}/dist/index.js`).toString(true),
87
));
88
89
this.initEditorOpenedListeners(storageService);
90
}
91
92
private async resolveWorkspaceLanguageIds() {
93
if (this.hasResolvedWorkspaceLanguageIds) { return; }
94
this.hasResolvedWorkspaceLanguageIds = true;
95
const fileExtensions = await this._diagnosticsService.getWorkspaceFileExtensions(this._workspaceContextService.getWorkspace());
96
97
let count = 0;
98
for (const ext of fileExtensions.extensions) {
99
const langId = this._languageDetectionWorkerClient.getLanguageId(ext);
100
if (langId && count < TOP_LANG_COUNTS) {
101
this.workspaceLanguageIds.add(langId);
102
count++;
103
if (count > TOP_LANG_COUNTS) { break; }
104
}
105
}
106
this.dirtyBiases = true;
107
}
108
109
public isEnabledForLanguage(languageId: string): boolean {
110
return !!languageId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId });
111
}
112
113
114
private getLanguageBiases(): Record<string, number> {
115
if (!this.dirtyBiases) { return this.langBiases; }
116
117
const biases: Record<string, number> = {};
118
119
// Give different weight to the biases depending on relevance of source
120
this.sessionOpenedLanguageIds.forEach(lang =>
121
biases[lang] = (biases[lang] ?? 0) + 7);
122
123
this.workspaceLanguageIds.forEach(lang =>
124
biases[lang] = (biases[lang] ?? 0) + 5);
125
126
[...this.historicalWorkspaceOpenedLanguageIds.keys()].forEach(lang =>
127
biases[lang] = (biases[lang] ?? 0) + 3);
128
129
[...this.historicalGlobalOpenedLanguageIds.keys()].forEach(lang =>
130
biases[lang] = (biases[lang] ?? 0) + 1);
131
132
this._logService.trace('Session Languages:', JSON.stringify([...this.sessionOpenedLanguageIds]));
133
this._logService.trace('Workspace Languages:', JSON.stringify([...this.workspaceLanguageIds]));
134
this._logService.trace('Historical Workspace Opened Languages:', JSON.stringify([...this.historicalWorkspaceOpenedLanguageIds.keys()]));
135
this._logService.trace('Historical Globally Opened Languages:', JSON.stringify([...this.historicalGlobalOpenedLanguageIds.keys()]));
136
this._logService.trace('Computed Language Detection Biases:', JSON.stringify(biases));
137
this.dirtyBiases = false;
138
this.langBiases = biases;
139
return biases;
140
}
141
142
async detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined> {
143
const useHistory = this._configurationService.getValue<string[]>(LanguageDetectionService.historyBasedEnablementConfig);
144
const preferHistory = this._configurationService.getValue<boolean>(LanguageDetectionService.preferHistoryConfig);
145
if (useHistory) {
146
await this.resolveWorkspaceLanguageIds();
147
}
148
const biases = useHistory ? this.getLanguageBiases() : undefined;
149
return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs);
150
}
151
152
// TODO: explore using the history service or something similar to provide this list of opened editors
153
// so this service can support delayed instantiation. This may be tricky since it seems the IHistoryService
154
// only gives history for a workspace... where this takes advantage of history at a global level as well.
155
private initEditorOpenedListeners(storageService: IStorageService) {
156
try {
157
const globalLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.globalOpenedLanguagesStorageKey, StorageScope.PROFILE, '[]'));
158
this.historicalGlobalOpenedLanguageIds.fromJSON(globalLangHistoryData);
159
} catch (e) { console.error(e); }
160
161
try {
162
const workspaceLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, StorageScope.WORKSPACE, '[]'));
163
this.historicalWorkspaceOpenedLanguageIds.fromJSON(workspaceLangHistoryData);
164
} catch (e) { console.error(e); }
165
166
this._register(this._editorService.onDidActiveEditorChange(() => {
167
const activeLanguage = this._editorService.activeTextEditorLanguageId;
168
if (activeLanguage && this._editorService.activeEditor?.resource?.scheme !== Schemas.untitled) {
169
this.sessionOpenedLanguageIds.add(activeLanguage);
170
this.historicalGlobalOpenedLanguageIds.set(activeLanguage, true);
171
this.historicalWorkspaceOpenedLanguageIds.set(activeLanguage, true);
172
storageService.store(LanguageDetectionService.globalOpenedLanguagesStorageKey, JSON.stringify(this.historicalGlobalOpenedLanguageIds.toJSON()), StorageScope.PROFILE, StorageTarget.MACHINE);
173
storageService.store(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, JSON.stringify(this.historicalWorkspaceOpenedLanguageIds.toJSON()), StorageScope.WORKSPACE, StorageTarget.MACHINE);
174
this.dirtyBiases = true;
175
}
176
}));
177
}
178
}
179
180
export class LanguageDetectionWorkerClient extends Disposable {
181
private worker: {
182
workerClient: IWebWorkerClient<ILanguageDetectionWorker>;
183
workerTextModelSyncClient: WorkerTextModelSyncClient;
184
} | undefined;
185
186
constructor(
187
private readonly _modelService: IModelService,
188
private readonly _languageService: ILanguageService,
189
private readonly _telemetryService: ITelemetryService,
190
private readonly _indexJsUri: string,
191
private readonly _modelJsonUri: string,
192
private readonly _weightsUri: string,
193
private readonly _regexpModelUri: string,
194
) {
195
super();
196
}
197
198
private _getOrCreateLanguageDetectionWorker(): {
199
workerClient: IWebWorkerClient<ILanguageDetectionWorker>;
200
workerTextModelSyncClient: WorkerTextModelSyncClient;
201
} {
202
if (!this.worker) {
203
const workerClient = this._register(createWebWorker<ILanguageDetectionWorker>(
204
FileAccess.asBrowserUri('vs/workbench/services/languageDetection/browser/languageDetectionWebWorkerMain.js'),
205
'LanguageDetectionWorker'
206
));
207
LanguageDetectionWorkerHost.setChannel(workerClient, {
208
$getIndexJsUri: async () => this.getIndexJsUri(),
209
$getLanguageId: async (languageIdOrExt) => this.getLanguageId(languageIdOrExt),
210
$sendTelemetryEvent: async (languages, confidences, timeSpent) => this.sendTelemetryEvent(languages, confidences, timeSpent),
211
$getRegexpModelUri: async () => this.getRegexpModelUri(),
212
$getModelJsonUri: async () => this.getModelJsonUri(),
213
$getWeightsUri: async () => this.getWeightsUri(),
214
});
215
const workerTextModelSyncClient = this._register(WorkerTextModelSyncClient.create(workerClient, this._modelService));
216
this.worker = { workerClient, workerTextModelSyncClient };
217
}
218
return this.worker;
219
}
220
221
private _guessLanguageIdByUri(uri: URI): string | undefined {
222
const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri);
223
if (guess && guess !== 'unknown') {
224
return guess;
225
}
226
return undefined;
227
}
228
229
async getIndexJsUri() {
230
return this._indexJsUri;
231
}
232
233
getLanguageId(languageIdOrExt: string | undefined) {
234
if (!languageIdOrExt) {
235
return undefined;
236
}
237
if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) {
238
return languageIdOrExt;
239
}
240
const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`));
241
if (!guessed || guessed === 'unknown') {
242
return undefined;
243
}
244
return guessed;
245
}
246
247
async getModelJsonUri() {
248
return this._modelJsonUri;
249
}
250
251
async getWeightsUri() {
252
return this._weightsUri;
253
}
254
255
async getRegexpModelUri() {
256
return this._regexpModelUri;
257
}
258
259
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
260
this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {
261
languages: languages.join(','),
262
confidences: confidences.join(','),
263
timeSpent
264
});
265
}
266
267
public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {
268
const startTime = Date.now();
269
const quickGuess = this._guessLanguageIdByUri(resource);
270
if (quickGuess) {
271
return quickGuess;
272
}
273
274
const { workerClient, workerTextModelSyncClient } = this._getOrCreateLanguageDetectionWorker();
275
workerTextModelSyncClient.ensureSyncedResources([resource]);
276
const modelId = await workerClient.proxy.$detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs);
277
const languageId = this.getLanguageId(modelId);
278
279
const LanguageDetectionStatsId = 'automaticlanguagedetection.perf';
280
281
interface ILanguageDetectionPerf {
282
timeSpent: number;
283
detection: string;
284
}
285
286
type LanguageDetectionPerfClassification = {
287
owner: 'TylerLeonhardt';
288
comment: 'Helps understand how effective language detection and how long it takes to run';
289
timeSpent: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The time it took to run language detection' };
290
detection: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The language that was detected' };
291
};
292
293
this._telemetryService.publicLog2<ILanguageDetectionPerf, LanguageDetectionPerfClassification>(LanguageDetectionStatsId, {
294
timeSpent: Date.now() - startTime,
295
detection: languageId || 'unknown',
296
});
297
298
return languageId;
299
}
300
}
301
302
// For now we use Eager until we handle keeping track of history better.
303
registerSingleton(ILanguageDetectionService, LanguageDetectionService, InstantiationType.Eager);
304
305