Path: blob/main/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts
5222 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Disposable } from '../../../../base/common/lifecycle.js';6import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from '../common/languageDetectionWorkerService.js';7import { AppResourcePath, FileAccess, nodeModulesAsarPath, nodeModulesPath, Schemas } from '../../../../base/common/network.js';8import { IWorkbenchEnvironmentService } from '../../environment/common/environmentService.js';9import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';10import { ILanguageService } from '../../../../editor/common/languages/language.js';11import { URI } from '../../../../base/common/uri.js';12import { isWeb } from '../../../../base/common/platform.js';13import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';14import { IModelService } from '../../../../editor/common/services/model.js';15import { IWebWorkerClient } from '../../../../base/common/worker/webWorker.js';16import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';17import { IDiagnosticsService } from '../../../../platform/diagnostics/common/diagnostics.js';18import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';19import { IEditorService } from '../../editor/common/editorService.js';20import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';21import { LRUCache } from '../../../../base/common/map.js';22import { ILogService } from '../../../../platform/log/common/log.js';23import { canASAR } from '../../../../amdX.js';24import { WebWorkerDescriptor } from '../../../../platform/webWorker/browser/webWorkerDescriptor.js';25import { IWebWorkerService } from '../../../../platform/webWorker/browser/webWorkerService.js';26import { WorkerTextModelSyncClient } from '../../../../editor/common/services/textModelSync/textModelSync.impl.js';27import { ILanguageDetectionWorker, LanguageDetectionWorkerHost } from './languageDetectionWorker.protocol.js';2829const TOP_LANG_COUNTS = 12;3031const regexpModuleLocation: AppResourcePath = `${nodeModulesPath}/vscode-regexp-languagedetection`;32const regexpModuleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/vscode-regexp-languagedetection`;33const moduleLocation: AppResourcePath = `${nodeModulesPath}/@vscode/vscode-languagedetection`;34const moduleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/@vscode/vscode-languagedetection`;3536export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {37static readonly enablementSettingKey = 'workbench.editor.languageDetection';38static readonly historyBasedEnablementConfig = 'workbench.editor.historyBasedLanguageDetection';39static readonly preferHistoryConfig = 'workbench.editor.preferHistoryBasedLanguageDetection';40static readonly workspaceOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.workspace';41static readonly globalOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.global';4243_serviceBrand: undefined;4445private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;4647private hasResolvedWorkspaceLanguageIds = false;48private workspaceLanguageIds = new Set<string>();49private sessionOpenedLanguageIds = new Set<string>();50private historicalGlobalOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);51private historicalWorkspaceOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);52private dirtyBiases: boolean = true;53private langBiases: Record<string, number> = {};5455constructor(56@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,57@ILanguageService languageService: ILanguageService,58@IConfigurationService private readonly _configurationService: IConfigurationService,59@IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService,60@IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService,61@IModelService modelService: IModelService,62@IEditorService private readonly _editorService: IEditorService,63@ITelemetryService telemetryService: ITelemetryService,64@IStorageService storageService: IStorageService,65@ILogService private readonly _logService: ILogService,66@IWebWorkerService webWorkerService: IWebWorkerService,67) {68super();6970const useAsar = canASAR && this._environmentService.isBuilt && !isWeb;71this._languageDetectionWorkerClient = this._register(new LanguageDetectionWorkerClient(72modelService,73languageService,74telemetryService,75webWorkerService,76// TODO See if it's possible to bundle vscode-languagedetection77useAsar78? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`).toString(true)79: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`).toString(true),80useAsar81? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`).toString(true)82: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`).toString(true),83useAsar84? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`).toString(true)85: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`).toString(true),86useAsar87? FileAccess.asBrowserUri(`${regexpModuleLocationAsar}/dist/index.js`).toString(true)88: FileAccess.asBrowserUri(`${regexpModuleLocation}/dist/index.js`).toString(true),89));9091this.initEditorOpenedListeners(storageService);92}9394private async resolveWorkspaceLanguageIds() {95if (this.hasResolvedWorkspaceLanguageIds) { return; }96this.hasResolvedWorkspaceLanguageIds = true;97const fileExtensions = await this._diagnosticsService.getWorkspaceFileExtensions(this._workspaceContextService.getWorkspace());9899let count = 0;100for (const ext of fileExtensions.extensions) {101const langId = this._languageDetectionWorkerClient.getLanguageId(ext);102if (langId && count < TOP_LANG_COUNTS) {103this.workspaceLanguageIds.add(langId);104count++;105if (count > TOP_LANG_COUNTS) { break; }106}107}108this.dirtyBiases = true;109}110111public isEnabledForLanguage(languageId: string): boolean {112return !!languageId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId });113}114115116private getLanguageBiases(): Record<string, number> {117if (!this.dirtyBiases) { return this.langBiases; }118119const biases: Record<string, number> = {};120121// Give different weight to the biases depending on relevance of source122this.sessionOpenedLanguageIds.forEach(lang =>123biases[lang] = (biases[lang] ?? 0) + 7);124125this.workspaceLanguageIds.forEach(lang =>126biases[lang] = (biases[lang] ?? 0) + 5);127128[...this.historicalWorkspaceOpenedLanguageIds.keys()].forEach(lang =>129biases[lang] = (biases[lang] ?? 0) + 3);130131[...this.historicalGlobalOpenedLanguageIds.keys()].forEach(lang =>132biases[lang] = (biases[lang] ?? 0) + 1);133134this._logService.trace('Session Languages:', JSON.stringify([...this.sessionOpenedLanguageIds]));135this._logService.trace('Workspace Languages:', JSON.stringify([...this.workspaceLanguageIds]));136this._logService.trace('Historical Workspace Opened Languages:', JSON.stringify([...this.historicalWorkspaceOpenedLanguageIds.keys()]));137this._logService.trace('Historical Globally Opened Languages:', JSON.stringify([...this.historicalGlobalOpenedLanguageIds.keys()]));138this._logService.trace('Computed Language Detection Biases:', JSON.stringify(biases));139this.dirtyBiases = false;140this.langBiases = biases;141return biases;142}143144async detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined> {145const useHistory = this._configurationService.getValue<string[]>(LanguageDetectionService.historyBasedEnablementConfig);146const preferHistory = this._configurationService.getValue<boolean>(LanguageDetectionService.preferHistoryConfig);147if (useHistory) {148await this.resolveWorkspaceLanguageIds();149}150const biases = useHistory ? this.getLanguageBiases() : undefined;151return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs);152}153154// TODO: explore using the history service or something similar to provide this list of opened editors155// so this service can support delayed instantiation. This may be tricky since it seems the IHistoryService156// only gives history for a workspace... where this takes advantage of history at a global level as well.157private initEditorOpenedListeners(storageService: IStorageService) {158try {159const globalLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.globalOpenedLanguagesStorageKey, StorageScope.PROFILE, '[]'));160this.historicalGlobalOpenedLanguageIds.fromJSON(globalLangHistoryData);161} catch (e) { console.error(e); }162163try {164const workspaceLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, StorageScope.WORKSPACE, '[]'));165this.historicalWorkspaceOpenedLanguageIds.fromJSON(workspaceLangHistoryData);166} catch (e) { console.error(e); }167168this._register(this._editorService.onDidActiveEditorChange(() => {169const activeLanguage = this._editorService.activeTextEditorLanguageId;170if (activeLanguage && this._editorService.activeEditor?.resource?.scheme !== Schemas.untitled) {171this.sessionOpenedLanguageIds.add(activeLanguage);172this.historicalGlobalOpenedLanguageIds.set(activeLanguage, true);173this.historicalWorkspaceOpenedLanguageIds.set(activeLanguage, true);174storageService.store(LanguageDetectionService.globalOpenedLanguagesStorageKey, JSON.stringify(this.historicalGlobalOpenedLanguageIds.toJSON()), StorageScope.PROFILE, StorageTarget.MACHINE);175storageService.store(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, JSON.stringify(this.historicalWorkspaceOpenedLanguageIds.toJSON()), StorageScope.WORKSPACE, StorageTarget.MACHINE);176this.dirtyBiases = true;177}178}));179}180}181182export class LanguageDetectionWorkerClient extends Disposable {183private worker: {184workerClient: IWebWorkerClient<ILanguageDetectionWorker>;185workerTextModelSyncClient: WorkerTextModelSyncClient;186} | undefined;187188constructor(189private readonly _modelService: IModelService,190private readonly _languageService: ILanguageService,191private readonly _telemetryService: ITelemetryService,192private readonly _webWorkerService: IWebWorkerService,193private readonly _indexJsUri: string,194private readonly _modelJsonUri: string,195private readonly _weightsUri: string,196private readonly _regexpModelUri: string,197) {198super();199}200201private _getOrCreateLanguageDetectionWorker(): {202workerClient: IWebWorkerClient<ILanguageDetectionWorker>;203workerTextModelSyncClient: WorkerTextModelSyncClient;204} {205if (!this.worker) {206const workerClient = this._register(this._webWorkerService.createWorkerClient<ILanguageDetectionWorker>(207new WebWorkerDescriptor({208esmModuleLocation: FileAccess.asBrowserUri('vs/workbench/services/languageDetection/browser/languageDetectionWebWorkerMain.js'),209label: 'LanguageDetectionWorker'210})211));212LanguageDetectionWorkerHost.setChannel(workerClient, {213$getIndexJsUri: async () => this.getIndexJsUri(),214$getLanguageId: async (languageIdOrExt) => this.getLanguageId(languageIdOrExt),215$sendTelemetryEvent: async (languages, confidences, timeSpent) => this.sendTelemetryEvent(languages, confidences, timeSpent),216$getRegexpModelUri: async () => this.getRegexpModelUri(),217$getModelJsonUri: async () => this.getModelJsonUri(),218$getWeightsUri: async () => this.getWeightsUri(),219});220const workerTextModelSyncClient = this._register(WorkerTextModelSyncClient.create(workerClient, this._modelService));221this.worker = { workerClient, workerTextModelSyncClient };222}223return this.worker;224}225226private _guessLanguageIdByUri(uri: URI): string | undefined {227const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri);228if (guess && guess !== 'unknown') {229return guess;230}231return undefined;232}233234async getIndexJsUri() {235return this._indexJsUri;236}237238getLanguageId(languageIdOrExt: string | undefined) {239if (!languageIdOrExt) {240return undefined;241}242if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) {243return languageIdOrExt;244}245const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`));246if (!guessed || guessed === 'unknown') {247return undefined;248}249return guessed;250}251252async getModelJsonUri() {253return this._modelJsonUri;254}255256async getWeightsUri() {257return this._weightsUri;258}259260async getRegexpModelUri() {261return this._regexpModelUri;262}263264async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {265this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {266languages: languages.join(','),267confidences: confidences.join(','),268timeSpent269});270}271272public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {273const startTime = Date.now();274const quickGuess = this._guessLanguageIdByUri(resource);275if (quickGuess) {276return quickGuess;277}278279const { workerClient, workerTextModelSyncClient } = this._getOrCreateLanguageDetectionWorker();280workerTextModelSyncClient.ensureSyncedResources([resource]);281const modelId = await workerClient.proxy.$detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs);282const languageId = this.getLanguageId(modelId);283284const LanguageDetectionStatsId = 'automaticlanguagedetection.perf';285286interface ILanguageDetectionPerf {287timeSpent: number;288detection: string;289}290291type LanguageDetectionPerfClassification = {292owner: 'TylerLeonhardt';293comment: 'Helps understand how effective language detection and how long it takes to run';294timeSpent: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The time it took to run language detection' };295detection: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The language that was detected' };296};297298this._telemetryService.publicLog2<ILanguageDetectionPerf, LanguageDetectionPerfClassification>(LanguageDetectionStatsId, {299timeSpent: Date.now() - startTime,300detection: languageId || 'unknown',301});302303return languageId;304}305}306307// For now we use Eager until we handle keeping track of history better.308registerSingleton(ILanguageDetectionService, LanguageDetectionService, InstantiationType.Eager);309310311