Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts
5222 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Disposable } from '../../../../base/common/lifecycle.js';
7
import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from '../common/languageDetectionWorkerService.js';
8
import { AppResourcePath, FileAccess, nodeModulesAsarPath, nodeModulesPath, Schemas } from '../../../../base/common/network.js';
9
import { IWorkbenchEnvironmentService } from '../../environment/common/environmentService.js';
10
import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
11
import { ILanguageService } from '../../../../editor/common/languages/language.js';
12
import { URI } from '../../../../base/common/uri.js';
13
import { isWeb } from '../../../../base/common/platform.js';
14
import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
15
import { IModelService } from '../../../../editor/common/services/model.js';
16
import { IWebWorkerClient } from '../../../../base/common/worker/webWorker.js';
17
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';
18
import { IDiagnosticsService } from '../../../../platform/diagnostics/common/diagnostics.js';
19
import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';
20
import { IEditorService } from '../../editor/common/editorService.js';
21
import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';
22
import { LRUCache } from '../../../../base/common/map.js';
23
import { ILogService } from '../../../../platform/log/common/log.js';
24
import { canASAR } from '../../../../amdX.js';
25
import { WebWorkerDescriptor } from '../../../../platform/webWorker/browser/webWorkerDescriptor.js';
26
import { IWebWorkerService } from '../../../../platform/webWorker/browser/webWorkerService.js';
27
import { WorkerTextModelSyncClient } from '../../../../editor/common/services/textModelSync/textModelSync.impl.js';
28
import { ILanguageDetectionWorker, LanguageDetectionWorkerHost } from './languageDetectionWorker.protocol.js';
29
30
const TOP_LANG_COUNTS = 12;
31
32
const regexpModuleLocation: AppResourcePath = `${nodeModulesPath}/vscode-regexp-languagedetection`;
33
const regexpModuleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/vscode-regexp-languagedetection`;
34
const moduleLocation: AppResourcePath = `${nodeModulesPath}/@vscode/vscode-languagedetection`;
35
const moduleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/@vscode/vscode-languagedetection`;
36
37
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
38
static readonly enablementSettingKey = 'workbench.editor.languageDetection';
39
static readonly historyBasedEnablementConfig = 'workbench.editor.historyBasedLanguageDetection';
40
static readonly preferHistoryConfig = 'workbench.editor.preferHistoryBasedLanguageDetection';
41
static readonly workspaceOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.workspace';
42
static readonly globalOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.global';
43
44
_serviceBrand: undefined;
45
46
private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;
47
48
private hasResolvedWorkspaceLanguageIds = false;
49
private workspaceLanguageIds = new Set<string>();
50
private sessionOpenedLanguageIds = new Set<string>();
51
private historicalGlobalOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);
52
private historicalWorkspaceOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);
53
private dirtyBiases: boolean = true;
54
private langBiases: Record<string, number> = {};
55
56
constructor(
57
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
58
@ILanguageService languageService: ILanguageService,
59
@IConfigurationService private readonly _configurationService: IConfigurationService,
60
@IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService,
61
@IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService,
62
@IModelService modelService: IModelService,
63
@IEditorService private readonly _editorService: IEditorService,
64
@ITelemetryService telemetryService: ITelemetryService,
65
@IStorageService storageService: IStorageService,
66
@ILogService private readonly _logService: ILogService,
67
@IWebWorkerService webWorkerService: IWebWorkerService,
68
) {
69
super();
70
71
const useAsar = canASAR && this._environmentService.isBuilt && !isWeb;
72
this._languageDetectionWorkerClient = this._register(new LanguageDetectionWorkerClient(
73
modelService,
74
languageService,
75
telemetryService,
76
webWorkerService,
77
// TODO See if it's possible to bundle vscode-languagedetection
78
useAsar
79
? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`).toString(true)
80
: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`).toString(true),
81
useAsar
82
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`).toString(true)
83
: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`).toString(true),
84
useAsar
85
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`).toString(true)
86
: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`).toString(true),
87
useAsar
88
? FileAccess.asBrowserUri(`${regexpModuleLocationAsar}/dist/index.js`).toString(true)
89
: FileAccess.asBrowserUri(`${regexpModuleLocation}/dist/index.js`).toString(true),
90
));
91
92
this.initEditorOpenedListeners(storageService);
93
}
94
95
private async resolveWorkspaceLanguageIds() {
96
if (this.hasResolvedWorkspaceLanguageIds) { return; }
97
this.hasResolvedWorkspaceLanguageIds = true;
98
const fileExtensions = await this._diagnosticsService.getWorkspaceFileExtensions(this._workspaceContextService.getWorkspace());
99
100
let count = 0;
101
for (const ext of fileExtensions.extensions) {
102
const langId = this._languageDetectionWorkerClient.getLanguageId(ext);
103
if (langId && count < TOP_LANG_COUNTS) {
104
this.workspaceLanguageIds.add(langId);
105
count++;
106
if (count > TOP_LANG_COUNTS) { break; }
107
}
108
}
109
this.dirtyBiases = true;
110
}
111
112
public isEnabledForLanguage(languageId: string): boolean {
113
return !!languageId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId });
114
}
115
116
117
private getLanguageBiases(): Record<string, number> {
118
if (!this.dirtyBiases) { return this.langBiases; }
119
120
const biases: Record<string, number> = {};
121
122
// Give different weight to the biases depending on relevance of source
123
this.sessionOpenedLanguageIds.forEach(lang =>
124
biases[lang] = (biases[lang] ?? 0) + 7);
125
126
this.workspaceLanguageIds.forEach(lang =>
127
biases[lang] = (biases[lang] ?? 0) + 5);
128
129
[...this.historicalWorkspaceOpenedLanguageIds.keys()].forEach(lang =>
130
biases[lang] = (biases[lang] ?? 0) + 3);
131
132
[...this.historicalGlobalOpenedLanguageIds.keys()].forEach(lang =>
133
biases[lang] = (biases[lang] ?? 0) + 1);
134
135
this._logService.trace('Session Languages:', JSON.stringify([...this.sessionOpenedLanguageIds]));
136
this._logService.trace('Workspace Languages:', JSON.stringify([...this.workspaceLanguageIds]));
137
this._logService.trace('Historical Workspace Opened Languages:', JSON.stringify([...this.historicalWorkspaceOpenedLanguageIds.keys()]));
138
this._logService.trace('Historical Globally Opened Languages:', JSON.stringify([...this.historicalGlobalOpenedLanguageIds.keys()]));
139
this._logService.trace('Computed Language Detection Biases:', JSON.stringify(biases));
140
this.dirtyBiases = false;
141
this.langBiases = biases;
142
return biases;
143
}
144
145
async detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined> {
146
const useHistory = this._configurationService.getValue<string[]>(LanguageDetectionService.historyBasedEnablementConfig);
147
const preferHistory = this._configurationService.getValue<boolean>(LanguageDetectionService.preferHistoryConfig);
148
if (useHistory) {
149
await this.resolveWorkspaceLanguageIds();
150
}
151
const biases = useHistory ? this.getLanguageBiases() : undefined;
152
return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs);
153
}
154
155
// TODO: explore using the history service or something similar to provide this list of opened editors
156
// so this service can support delayed instantiation. This may be tricky since it seems the IHistoryService
157
// only gives history for a workspace... where this takes advantage of history at a global level as well.
158
private initEditorOpenedListeners(storageService: IStorageService) {
159
try {
160
const globalLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.globalOpenedLanguagesStorageKey, StorageScope.PROFILE, '[]'));
161
this.historicalGlobalOpenedLanguageIds.fromJSON(globalLangHistoryData);
162
} catch (e) { console.error(e); }
163
164
try {
165
const workspaceLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, StorageScope.WORKSPACE, '[]'));
166
this.historicalWorkspaceOpenedLanguageIds.fromJSON(workspaceLangHistoryData);
167
} catch (e) { console.error(e); }
168
169
this._register(this._editorService.onDidActiveEditorChange(() => {
170
const activeLanguage = this._editorService.activeTextEditorLanguageId;
171
if (activeLanguage && this._editorService.activeEditor?.resource?.scheme !== Schemas.untitled) {
172
this.sessionOpenedLanguageIds.add(activeLanguage);
173
this.historicalGlobalOpenedLanguageIds.set(activeLanguage, true);
174
this.historicalWorkspaceOpenedLanguageIds.set(activeLanguage, true);
175
storageService.store(LanguageDetectionService.globalOpenedLanguagesStorageKey, JSON.stringify(this.historicalGlobalOpenedLanguageIds.toJSON()), StorageScope.PROFILE, StorageTarget.MACHINE);
176
storageService.store(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, JSON.stringify(this.historicalWorkspaceOpenedLanguageIds.toJSON()), StorageScope.WORKSPACE, StorageTarget.MACHINE);
177
this.dirtyBiases = true;
178
}
179
}));
180
}
181
}
182
183
export class LanguageDetectionWorkerClient extends Disposable {
184
private worker: {
185
workerClient: IWebWorkerClient<ILanguageDetectionWorker>;
186
workerTextModelSyncClient: WorkerTextModelSyncClient;
187
} | undefined;
188
189
constructor(
190
private readonly _modelService: IModelService,
191
private readonly _languageService: ILanguageService,
192
private readonly _telemetryService: ITelemetryService,
193
private readonly _webWorkerService: IWebWorkerService,
194
private readonly _indexJsUri: string,
195
private readonly _modelJsonUri: string,
196
private readonly _weightsUri: string,
197
private readonly _regexpModelUri: string,
198
) {
199
super();
200
}
201
202
private _getOrCreateLanguageDetectionWorker(): {
203
workerClient: IWebWorkerClient<ILanguageDetectionWorker>;
204
workerTextModelSyncClient: WorkerTextModelSyncClient;
205
} {
206
if (!this.worker) {
207
const workerClient = this._register(this._webWorkerService.createWorkerClient<ILanguageDetectionWorker>(
208
new WebWorkerDescriptor({
209
esmModuleLocation: FileAccess.asBrowserUri('vs/workbench/services/languageDetection/browser/languageDetectionWebWorkerMain.js'),
210
label: 'LanguageDetectionWorker'
211
})
212
));
213
LanguageDetectionWorkerHost.setChannel(workerClient, {
214
$getIndexJsUri: async () => this.getIndexJsUri(),
215
$getLanguageId: async (languageIdOrExt) => this.getLanguageId(languageIdOrExt),
216
$sendTelemetryEvent: async (languages, confidences, timeSpent) => this.sendTelemetryEvent(languages, confidences, timeSpent),
217
$getRegexpModelUri: async () => this.getRegexpModelUri(),
218
$getModelJsonUri: async () => this.getModelJsonUri(),
219
$getWeightsUri: async () => this.getWeightsUri(),
220
});
221
const workerTextModelSyncClient = this._register(WorkerTextModelSyncClient.create(workerClient, this._modelService));
222
this.worker = { workerClient, workerTextModelSyncClient };
223
}
224
return this.worker;
225
}
226
227
private _guessLanguageIdByUri(uri: URI): string | undefined {
228
const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri);
229
if (guess && guess !== 'unknown') {
230
return guess;
231
}
232
return undefined;
233
}
234
235
async getIndexJsUri() {
236
return this._indexJsUri;
237
}
238
239
getLanguageId(languageIdOrExt: string | undefined) {
240
if (!languageIdOrExt) {
241
return undefined;
242
}
243
if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) {
244
return languageIdOrExt;
245
}
246
const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`));
247
if (!guessed || guessed === 'unknown') {
248
return undefined;
249
}
250
return guessed;
251
}
252
253
async getModelJsonUri() {
254
return this._modelJsonUri;
255
}
256
257
async getWeightsUri() {
258
return this._weightsUri;
259
}
260
261
async getRegexpModelUri() {
262
return this._regexpModelUri;
263
}
264
265
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
266
this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {
267
languages: languages.join(','),
268
confidences: confidences.join(','),
269
timeSpent
270
});
271
}
272
273
public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {
274
const startTime = Date.now();
275
const quickGuess = this._guessLanguageIdByUri(resource);
276
if (quickGuess) {
277
return quickGuess;
278
}
279
280
const { workerClient, workerTextModelSyncClient } = this._getOrCreateLanguageDetectionWorker();
281
workerTextModelSyncClient.ensureSyncedResources([resource]);
282
const modelId = await workerClient.proxy.$detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs);
283
const languageId = this.getLanguageId(modelId);
284
285
const LanguageDetectionStatsId = 'automaticlanguagedetection.perf';
286
287
interface ILanguageDetectionPerf {
288
timeSpent: number;
289
detection: string;
290
}
291
292
type LanguageDetectionPerfClassification = {
293
owner: 'TylerLeonhardt';
294
comment: 'Helps understand how effective language detection and how long it takes to run';
295
timeSpent: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The time it took to run language detection' };
296
detection: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The language that was detected' };
297
};
298
299
this._telemetryService.publicLog2<ILanguageDetectionPerf, LanguageDetectionPerfClassification>(LanguageDetectionStatsId, {
300
timeSpent: Date.now() - startTime,
301
detection: languageId || 'unknown',
302
});
303
304
return languageId;
305
}
306
}
307
308
// For now we use Eager until we handle keeping track of history better.
309
registerSingleton(ILanguageDetectionService, LanguageDetectionService, InstantiationType.Eager);
310
311