Path: blob/main/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts
3296 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Disposable } from '../../../../base/common/lifecycle.js';6import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from '../common/languageDetectionWorkerService.js';7import { AppResourcePath, FileAccess, nodeModulesAsarPath, nodeModulesPath, Schemas } from '../../../../base/common/network.js';8import { IWorkbenchEnvironmentService } from '../../environment/common/environmentService.js';9import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';10import { ILanguageService } from '../../../../editor/common/languages/language.js';11import { URI } from '../../../../base/common/uri.js';12import { isWeb } from '../../../../base/common/platform.js';13import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';14import { IModelService } from '../../../../editor/common/services/model.js';15import { IWebWorkerClient } from '../../../../base/common/worker/webWorker.js';16import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';17import { IDiagnosticsService } from '../../../../platform/diagnostics/common/diagnostics.js';18import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';19import { IEditorService } from '../../editor/common/editorService.js';20import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';21import { LRUCache } from '../../../../base/common/map.js';22import { ILogService } from '../../../../platform/log/common/log.js';23import { canASAR } from '../../../../amdX.js';24import { createWebWorker } from '../../../../base/browser/webWorkerFactory.js';25import { WorkerTextModelSyncClient } from '../../../../editor/common/services/textModelSync/textModelSync.impl.js';26import { ILanguageDetectionWorker, LanguageDetectionWorkerHost } from './languageDetectionWorker.protocol.js';2728const TOP_LANG_COUNTS = 12;2930const regexpModuleLocation: AppResourcePath = `${nodeModulesPath}/vscode-regexp-languagedetection`;31const regexpModuleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/vscode-regexp-languagedetection`;32const moduleLocation: AppResourcePath = `${nodeModulesPath}/@vscode/vscode-languagedetection`;33const moduleLocationAsar: AppResourcePath = `${nodeModulesAsarPath}/@vscode/vscode-languagedetection`;3435export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {36static readonly enablementSettingKey = 'workbench.editor.languageDetection';37static readonly historyBasedEnablementConfig = 'workbench.editor.historyBasedLanguageDetection';38static readonly preferHistoryConfig = 'workbench.editor.preferHistoryBasedLanguageDetection';39static readonly workspaceOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.workspace';40static readonly globalOpenedLanguagesStorageKey = 'workbench.editor.languageDetectionOpenedLanguages.global';4142_serviceBrand: undefined;4344private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;4546private hasResolvedWorkspaceLanguageIds = false;47private workspaceLanguageIds = new Set<string>();48private sessionOpenedLanguageIds = new Set<string>();49private historicalGlobalOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);50private historicalWorkspaceOpenedLanguageIds = new LRUCache<string, true>(TOP_LANG_COUNTS);51private dirtyBiases: boolean = true;52private langBiases: Record<string, number> = {};5354constructor(55@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,56@ILanguageService languageService: ILanguageService,57@IConfigurationService private readonly _configurationService: IConfigurationService,58@IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService,59@IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService,60@IModelService modelService: IModelService,61@IEditorService private readonly _editorService: IEditorService,62@ITelemetryService telemetryService: ITelemetryService,63@IStorageService storageService: IStorageService,64@ILogService private readonly _logService: ILogService65) {66super();6768const useAsar = canASAR && this._environmentService.isBuilt && !isWeb;69this._languageDetectionWorkerClient = this._register(new LanguageDetectionWorkerClient(70modelService,71languageService,72telemetryService,73// TODO See if it's possible to bundle vscode-languagedetection74useAsar75? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`).toString(true)76: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`).toString(true),77useAsar78? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`).toString(true)79: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`).toString(true),80useAsar81? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`).toString(true)82: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`).toString(true),83useAsar84? FileAccess.asBrowserUri(`${regexpModuleLocationAsar}/dist/index.js`).toString(true)85: FileAccess.asBrowserUri(`${regexpModuleLocation}/dist/index.js`).toString(true),86));8788this.initEditorOpenedListeners(storageService);89}9091private async resolveWorkspaceLanguageIds() {92if (this.hasResolvedWorkspaceLanguageIds) { return; }93this.hasResolvedWorkspaceLanguageIds = true;94const fileExtensions = await this._diagnosticsService.getWorkspaceFileExtensions(this._workspaceContextService.getWorkspace());9596let count = 0;97for (const ext of fileExtensions.extensions) {98const langId = this._languageDetectionWorkerClient.getLanguageId(ext);99if (langId && count < TOP_LANG_COUNTS) {100this.workspaceLanguageIds.add(langId);101count++;102if (count > TOP_LANG_COUNTS) { break; }103}104}105this.dirtyBiases = true;106}107108public isEnabledForLanguage(languageId: string): boolean {109return !!languageId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId });110}111112113private getLanguageBiases(): Record<string, number> {114if (!this.dirtyBiases) { return this.langBiases; }115116const biases: Record<string, number> = {};117118// Give different weight to the biases depending on relevance of source119this.sessionOpenedLanguageIds.forEach(lang =>120biases[lang] = (biases[lang] ?? 0) + 7);121122this.workspaceLanguageIds.forEach(lang =>123biases[lang] = (biases[lang] ?? 0) + 5);124125[...this.historicalWorkspaceOpenedLanguageIds.keys()].forEach(lang =>126biases[lang] = (biases[lang] ?? 0) + 3);127128[...this.historicalGlobalOpenedLanguageIds.keys()].forEach(lang =>129biases[lang] = (biases[lang] ?? 0) + 1);130131this._logService.trace('Session Languages:', JSON.stringify([...this.sessionOpenedLanguageIds]));132this._logService.trace('Workspace Languages:', JSON.stringify([...this.workspaceLanguageIds]));133this._logService.trace('Historical Workspace Opened Languages:', JSON.stringify([...this.historicalWorkspaceOpenedLanguageIds.keys()]));134this._logService.trace('Historical Globally Opened Languages:', JSON.stringify([...this.historicalGlobalOpenedLanguageIds.keys()]));135this._logService.trace('Computed Language Detection Biases:', JSON.stringify(biases));136this.dirtyBiases = false;137this.langBiases = biases;138return biases;139}140141async detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined> {142const useHistory = this._configurationService.getValue<string[]>(LanguageDetectionService.historyBasedEnablementConfig);143const preferHistory = this._configurationService.getValue<boolean>(LanguageDetectionService.preferHistoryConfig);144if (useHistory) {145await this.resolveWorkspaceLanguageIds();146}147const biases = useHistory ? this.getLanguageBiases() : undefined;148return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs);149}150151// TODO: explore using the history service or something similar to provide this list of opened editors152// so this service can support delayed instantiation. This may be tricky since it seems the IHistoryService153// only gives history for a workspace... where this takes advantage of history at a global level as well.154private initEditorOpenedListeners(storageService: IStorageService) {155try {156const globalLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.globalOpenedLanguagesStorageKey, StorageScope.PROFILE, '[]'));157this.historicalGlobalOpenedLanguageIds.fromJSON(globalLangHistoryData);158} catch (e) { console.error(e); }159160try {161const workspaceLangHistoryData = JSON.parse(storageService.get(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, StorageScope.WORKSPACE, '[]'));162this.historicalWorkspaceOpenedLanguageIds.fromJSON(workspaceLangHistoryData);163} catch (e) { console.error(e); }164165this._register(this._editorService.onDidActiveEditorChange(() => {166const activeLanguage = this._editorService.activeTextEditorLanguageId;167if (activeLanguage && this._editorService.activeEditor?.resource?.scheme !== Schemas.untitled) {168this.sessionOpenedLanguageIds.add(activeLanguage);169this.historicalGlobalOpenedLanguageIds.set(activeLanguage, true);170this.historicalWorkspaceOpenedLanguageIds.set(activeLanguage, true);171storageService.store(LanguageDetectionService.globalOpenedLanguagesStorageKey, JSON.stringify(this.historicalGlobalOpenedLanguageIds.toJSON()), StorageScope.PROFILE, StorageTarget.MACHINE);172storageService.store(LanguageDetectionService.workspaceOpenedLanguagesStorageKey, JSON.stringify(this.historicalWorkspaceOpenedLanguageIds.toJSON()), StorageScope.WORKSPACE, StorageTarget.MACHINE);173this.dirtyBiases = true;174}175}));176}177}178179export class LanguageDetectionWorkerClient extends Disposable {180private worker: {181workerClient: IWebWorkerClient<ILanguageDetectionWorker>;182workerTextModelSyncClient: WorkerTextModelSyncClient;183} | undefined;184185constructor(186private readonly _modelService: IModelService,187private readonly _languageService: ILanguageService,188private readonly _telemetryService: ITelemetryService,189private readonly _indexJsUri: string,190private readonly _modelJsonUri: string,191private readonly _weightsUri: string,192private readonly _regexpModelUri: string,193) {194super();195}196197private _getOrCreateLanguageDetectionWorker(): {198workerClient: IWebWorkerClient<ILanguageDetectionWorker>;199workerTextModelSyncClient: WorkerTextModelSyncClient;200} {201if (!this.worker) {202const workerClient = this._register(createWebWorker<ILanguageDetectionWorker>(203FileAccess.asBrowserUri('vs/workbench/services/languageDetection/browser/languageDetectionWebWorkerMain.js'),204'LanguageDetectionWorker'205));206LanguageDetectionWorkerHost.setChannel(workerClient, {207$getIndexJsUri: async () => this.getIndexJsUri(),208$getLanguageId: async (languageIdOrExt) => this.getLanguageId(languageIdOrExt),209$sendTelemetryEvent: async (languages, confidences, timeSpent) => this.sendTelemetryEvent(languages, confidences, timeSpent),210$getRegexpModelUri: async () => this.getRegexpModelUri(),211$getModelJsonUri: async () => this.getModelJsonUri(),212$getWeightsUri: async () => this.getWeightsUri(),213});214const workerTextModelSyncClient = this._register(WorkerTextModelSyncClient.create(workerClient, this._modelService));215this.worker = { workerClient, workerTextModelSyncClient };216}217return this.worker;218}219220private _guessLanguageIdByUri(uri: URI): string | undefined {221const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri);222if (guess && guess !== 'unknown') {223return guess;224}225return undefined;226}227228async getIndexJsUri() {229return this._indexJsUri;230}231232getLanguageId(languageIdOrExt: string | undefined) {233if (!languageIdOrExt) {234return undefined;235}236if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) {237return languageIdOrExt;238}239const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`));240if (!guessed || guessed === 'unknown') {241return undefined;242}243return guessed;244}245246async getModelJsonUri() {247return this._modelJsonUri;248}249250async getWeightsUri() {251return this._weightsUri;252}253254async getRegexpModelUri() {255return this._regexpModelUri;256}257258async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {259this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {260languages: languages.join(','),261confidences: confidences.join(','),262timeSpent263});264}265266public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {267const startTime = Date.now();268const quickGuess = this._guessLanguageIdByUri(resource);269if (quickGuess) {270return quickGuess;271}272273const { workerClient, workerTextModelSyncClient } = this._getOrCreateLanguageDetectionWorker();274workerTextModelSyncClient.ensureSyncedResources([resource]);275const modelId = await workerClient.proxy.$detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs);276const languageId = this.getLanguageId(modelId);277278const LanguageDetectionStatsId = 'automaticlanguagedetection.perf';279280interface ILanguageDetectionPerf {281timeSpent: number;282detection: string;283}284285type LanguageDetectionPerfClassification = {286owner: 'TylerLeonhardt';287comment: 'Helps understand how effective language detection and how long it takes to run';288timeSpent: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The time it took to run language detection' };289detection: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The language that was detected' };290};291292this._telemetryService.publicLog2<ILanguageDetectionPerf, LanguageDetectionPerfClassification>(LanguageDetectionStatsId, {293timeSpent: Date.now() - startTime,294detection: languageId || 'unknown',295});296297return languageId;298}299}300301// For now we use Eager until we handle keeping track of history better.302registerSingleton(ILanguageDetectionService, LanguageDetectionService, InstantiationType.Eager);303304305