Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/common/promptCategorizationTaxonomy.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
/**
7
* Domain + Intent + Scope + Time Estimate classification taxonomy.
8
*
9
* Single source of truth for the domain, intent, and scope categories (derived from
10
* clustering analysis) and time estimate dimensions.
11
*/
12
13
// ============================================================================
14
// INTENTS - What action the user wants
15
// ============================================================================
16
17
export const INTENT_DEFINITIONS = {
18
explain: {
19
description: 'Prompts asking the assistant to explain code, concepts, or technical topics. Includes requests for clarification, summaries, definitions, and step-by-step walkthroughs of implementations or workflows.',
20
keywords: ['explanation', 'understanding', 'clarification', 'how-it-works', 'summary', 'definitions', 'step-by-step', 'guidance'],
21
},
22
find_content: {
23
description: 'Prompts requesting the assistant to retrieve, read, or locate files, code references, definitions, and usage patterns within a codebase or project repository.',
24
keywords: ['retrieve', 'read', 'file contents', 'search', 'references', 'codebase', 'locate', 'fetch'],
25
},
26
research: {
27
description: 'Prompts requesting the assistant to research and investigate implementation details, usage patterns, and documentation of existing code or systems.',
28
keywords: ['research', 'implementation details', 'documentation', 'usage patterns', 'investigation'],
29
},
30
review: {
31
description: 'Prompts requesting code review, validation of implementations against requirements, analysis of code changes and quality, and identification of issues, vulnerabilities, and improvements. Covers both formal review feedback and structural/usage pattern analysis.',
32
keywords: ['code review', 'validation', 'compliance', 'correctness', 'code quality', 'vulnerability analysis', 'code changes', 'feedback'],
33
},
34
generate_docs: {
35
description: 'Prompts requesting the assistant to generate documentation, summary reports, and example or sample code.',
36
keywords: ['documentation', 'generate', 'summary reports', 'example code', 'technical writing'],
37
},
38
troubleshoot_debug: {
39
description: 'Prompts requesting help diagnosing and resolving failures, errors, bugs, and incidents. Includes troubleshooting build/code errors, root cause analysis, and investigation of test failures and operational incidents.',
40
keywords: ['troubleshoot', 'debug', 'failure', 'error', 'root cause', 'fix', 'build errors', 'incidents', 'bugs'],
41
},
42
git_ops: {
43
description: 'Prompts requesting help with Git branch operations including creating, switching, merging, rebasing branches, and resolving merge conflicts.',
44
keywords: ['branch', 'merge', 'rebase', 'conflicts', 'commit', 'Git operations'],
45
},
46
run_code: {
47
description: 'Prompts requesting the assistant to run, execute, or initiate code, scripts, commands, builds, or other defined processes.',
48
keywords: ['execute', 'run', 'build', 'script', 'process', 'commands'],
49
},
50
config_mgmt: {
51
description: 'Prompts requesting changes to application configuration, features, user interface design, or documentation, typically involving updates or modifications to existing settings and appearance.',
52
keywords: ['configuration', 'feature updates', 'UI modification', 'settings', 'design changes'],
53
},
54
new_feature: {
55
description: 'Prompts requesting the assistant to build a new user-facing feature or capability requiring coordinated code changes, typically spanning multiple files or components.',
56
keywords: ['build', 'implement', 'add feature', 'create feature', 'set up', 'integrate', 'new capability'],
57
},
58
refactor: {
59
description: 'Prompts requesting the assistant to restructure, reorganize, or improve existing code without changing its external behavior. Includes extracting functions, renaming, simplifying logic, and improving code organization.',
60
keywords: ['refactor', 'restructure', 'reorganize', 'clean up', 'extract', 'simplify', 'rename', 'improve structure'],
61
},
62
data_analysis_viz: {
63
description: 'Prompts requesting the assistant to analyze data, create visualizations, build charts or graphs, run queries, or explore datasets for insights and reporting.',
64
keywords: ['data analysis', 'visualization', 'charts', 'graphs', 'querying', 'reporting', 'dashboards', 'data exploration'],
65
},
66
need_info: {
67
description: 'Not enough information to determine the intent. The prompt may be too short, too vague, or lack sufficient context to make a determination.',
68
keywords: [],
69
},
70
other: {
71
description: 'Prompts whose intent does not fit into any of the defined categories. These may involve niche actions or mixed intents outside the taxonomy.',
72
keywords: [],
73
},
74
} as const satisfies Record<string, CategoryDefinition>;
75
76
// ============================================================================
77
// DOMAINS - What area of code/system (orthogonal to intents)
78
// ============================================================================
79
80
export const DOMAIN_DEFINITIONS = {
81
cicd_cloud_infra: {
82
description: 'Prompts involving continuous integration/deployment pipeline configuration, cloud infrastructure provisioning and automation, container orchestration, and infrastructure-as-code workflows.',
83
keywords: ['CI/CD', 'build automation', 'deployment pipelines', 'cloud infrastructure', 'provisioning', 'IaC', 'containerization', 'configuration management', 'DevOps'],
84
},
85
cli_scripting: {
86
description: 'Prompts focused on building, customizing, and automating command-line interface tools, shell scripts, and terminal workflows for developer productivity.',
87
keywords: ['CLI', 'command-line', 'shell scripting', 'bash', 'PowerShell', 'terminal', 'task automation'],
88
},
89
automated_testing: {
90
description: 'Prompts focused on automated software testing tools, frameworks, and suites spanning unit, integration, and end-to-end testing, including test coverage and workflow analysis.',
91
keywords: ['automated testing', 'unit testing', 'integration testing', 'end-to-end testing', 'test frameworks', 'test suites', 'test coverage'],
92
},
93
ai_agent: {
94
description: 'Prompts focused on designing, configuring, and orchestrating AI agents and coding assistants, including their workflows, integration architectures, and framework capabilities.',
95
keywords: ['AI agents', 'orchestration', 'workflow automation', 'integration architecture', 'coding assistants', 'LLM integration', 'MCP'],
96
},
97
network_infra: {
98
description: 'Prompts focused on configuring, deploying, and managing network infrastructure, including remote access, multi-server environments, and network security.',
99
keywords: ['network configuration', 'server management', 'remote access', 'firewall', 'DNS', 'VPN', 'load balancing', 'routing', 'connectivity'],
100
},
101
project_mgmt: {
102
description: 'Prompts related to project management, issue tracking, and task management within development workflows.',
103
keywords: ['issue tracking', 'project management', 'task management', 'workflow management', 'project planning'],
104
},
105
data_pipelines: {
106
description: 'Prompts focused on building, configuring, and orchestrating data processing pipelines that handle ingestion, transformation, and formatting of structured data across various file formats and scales.',
107
keywords: ['data pipelines', 'ETL workflows', 'data transformation', 'file processing', 'pipeline orchestration', 'ingestion'],
108
},
109
web_ui: {
110
description: 'Prompts focused on designing, building, and architecting user interface components and layouts for web application frontends.',
111
keywords: ['UI', 'web application', 'user interface', 'frontend', 'components', 'layout', 'styling', 'responsive design'],
112
},
113
backend_dev: {
114
description: 'Prompts focused on building, designing, and maintaining server-side applications, APIs, business logic, authentication, and service architectures.',
115
keywords: ['API', 'server', 'endpoint', 'REST', 'GraphQL', 'backend', 'microservices', 'authentication', 'business logic'],
116
},
117
game_dev: {
118
description: 'Prompts focused on designing, building, and testing the architecture, mechanics, and subsystems of digital and tabletop games.',
119
keywords: ['game development', 'game engine', 'game mechanics', 'rendering', 'multiplayer', 'interactive gameplay', 'asset creation'],
120
},
121
package_mgmt: {
122
description: 'Prompts focused on managing software dependencies, package installations, version control of libraries, and release workflows across programming languages and platforms.',
123
keywords: ['dependency management', 'package managers', 'version management', 'software releases', 'dependency resolution'],
124
},
125
version_control: {
126
description: 'Prompts related to managing source code repositories, version control systems, branching and merging strategies, and collaborative development workflows.',
127
keywords: ['source code', 'repository', 'version control', 'Git', 'branching', 'merging', 'code management'],
128
},
129
incident_mgmt: {
130
description: 'Prompts focused on building, integrating, and querying incident management systems for tracking, triaging, investigating, and resolving operational and security incidents.',
131
keywords: ['incident management', 'security incidents', 'ticketing systems', 'workflow automation', 'incident response', 'triage'],
132
},
133
logging_observability: {
134
description: 'Prompts focused on designing, configuring, querying, and analyzing application and system logs, including logging frameworks, log aggregation, monitoring dashboards, and observability infrastructure.',
135
keywords: ['logging', 'log analysis', 'monitoring', 'observability', 'metrics', 'alerting', 'tracing', 'dashboards'],
136
},
137
database_mgmt: {
138
description: 'Prompts focused on designing, analyzing, managing, and querying relational database schemas, including data modeling for business intelligence and data warehouse contexts.',
139
keywords: ['database schema', 'relational database', 'data modeling', 'query design', 'schema management', 'SQL'],
140
},
141
ml_statistics: {
142
description: 'Prompts focused on machine learning model development, training, evaluation, and deployment, as well as statistical analysis, data science workflows, and mathematical modeling.',
143
keywords: ['machine learning', 'deep learning', 'neural networks', 'model training', 'statistics', 'regression', 'classification', 'data science', 'feature engineering', 'model evaluation'],
144
},
145
need_info: {
146
description: 'Not enough information to determine the domain. The prompt may be too short, too vague, or lack sufficient context to make a determination.',
147
keywords: [],
148
},
149
other: {
150
description: 'Prompts that do not fit into any of the defined domain categories. These may involve niche or specialized topics outside the taxonomy.',
151
keywords: [],
152
},
153
} as const satisfies Record<string, CategoryDefinition>;
154
155
// ============================================================================
156
// SCOPES - What code context is needed
157
// ============================================================================
158
159
export const SCOPE_DEFINITIONS = {
160
// File-level scopes
161
selection: {
162
description: 'Operates on user\'s currently selected/highlighted code',
163
signals: ['user has active selection', 'uses "this"'],
164
},
165
current_file: {
166
description: 'Entire file user is currently viewing/editing',
167
signals: ['"this file"', 'mentions filename', 'file-level operation'],
168
},
169
few_files: {
170
description: 'Small set of related files (2-5 files)',
171
signals: ['"this component and its tests"', 'specific file mentions'],
172
},
173
many_files: {
174
description: 'Large set of files or entire module/package',
175
signals: ['"all components"', '"entire module"', '"across files"'],
176
},
177
178
// Repository scopes
179
codebase: {
180
description: 'Entire project/codebase understanding required',
181
signals: ['"project"', '"codebase"', '"application"', '"system"', 'architecture-level'],
182
},
183
multi_repository: {
184
description: 'Operates across multiple repositories (microservices, monorepo packages)',
185
signals: ['"other repo"', '"microservice"', '"shared library"', 'cross-repo dependency', 'multi-package'],
186
},
187
188
// External scopes
189
scm_operations: {
190
description: 'Git operations, branch management, PR creation',
191
signals: ['git commands', 'branch', 'PR', 'merge', 'rebase', 'git history', 'cherry-pick', 'git push', 'git pull', 'git fetch', 'git commit', 'git diff', 'git stash'],
192
},
193
issue_tracker: {
194
description: 'Operates on issue tracking systems (GitHub Issues, JIRA, Linear)',
195
signals: ['issue', 'bug', 'ticket', 'backlog', 'sprint', 'tracking system'],
196
},
197
remote_service: {
198
description: 'Interacts with external services, APIs, cloud resources, or remote databases',
199
signals: ['external API', 'cloud service', 'SaaS', 'third-party', 'webhook', 'staging database', 'production database', 'remote connection', 'SSH'],
200
},
201
external: {
202
description: 'Requires knowledge outside the codebase (docs, web, general knowledge)',
203
signals: ['questions about languages', 'frameworks', 'best practices', '"how to" (general)'],
204
},
205
206
// Transient
207
ephemeral: {
208
description: 'One-off task, doesn\'t directly modify main codebase',
209
signals: ['"write a script to"', '"analyze this data"', 'temporary/throwaway work'],
210
},
211
unknown_scope: {
212
description: 'Scope cannot be determined from message',
213
signals: [],
214
},
215
} as const satisfies Record<string, CategoryDefinition>;
216
217
// ============================================================================
218
// Shared types and utilities
219
// ============================================================================
220
221
interface CategoryDefinition {
222
description: string;
223
keywords?: readonly string[];
224
examples?: readonly string[];
225
signals?: readonly string[];
226
notes?: string;
227
}
228
229
/** Extract keys as union type */
230
export type PromptIntent = keyof typeof INTENT_DEFINITIONS;
231
export type PromptDomain = keyof typeof DOMAIN_DEFINITIONS;
232
export type PromptScope = keyof typeof SCOPE_DEFINITIONS;
233
234
/** Validation sets - derived from definitions */
235
export const VALID_INTENTS = new Set(Object.keys(INTENT_DEFINITIONS)) as ReadonlySet<PromptIntent>;
236
export const VALID_DOMAINS = new Set(Object.keys(DOMAIN_DEFINITIONS)) as ReadonlySet<PromptDomain>;
237
export const VALID_SCOPES = new Set(Object.keys(SCOPE_DEFINITIONS)) as ReadonlySet<PromptScope>;
238
239
/** Type guards */
240
export function isValidIntent(value: string): value is PromptIntent {
241
return VALID_INTENTS.has(value as PromptIntent);
242
}
243
export function isValidDomain(value: string): value is PromptDomain {
244
return VALID_DOMAINS.has(value as PromptDomain);
245
}
246
export function isValidScope(value: string): value is PromptScope {
247
return VALID_SCOPES.has(value as PromptScope);
248
}
249
250
/**
251
* The classification result structure
252
*/
253
export interface PromptClassification {
254
intent: PromptIntent;
255
domain: PromptDomain;
256
timeEstimate: {
257
/** ISO 8601 duration for best case scenario, e.g., "PT5M" for 5 minutes */
258
bestCase: string;
259
/** ISO 8601 duration for realistic scenario, e.g., "PT15M" for 15 minutes */
260
realistic: string;
261
};
262
scope: PromptScope;
263
/** Confidence score between 0.0 and 1.0 */
264
confidence: number;
265
/** Brief reasoning for the classification */
266
reasoning: string;
267
}
268
269
// ============================================================================
270
// Prompt generation helpers
271
// ============================================================================
272
273
function formatCategoryForPrompt(key: string, def: CategoryDefinition): string {
274
const parts = [`### \`${key}\``, def.description];
275
276
if (def.keywords?.length) {
277
parts.push(`- Keywords: ${def.keywords.join(', ')}`);
278
}
279
if (def.signals?.length) {
280
parts.push(`- Signals: ${def.signals.join(', ')}`);
281
}
282
if (def.examples?.length) {
283
parts.push(`Examples: ${def.examples.map(e => `"${e}"`).join(', ')}`);
284
}
285
if (def.notes) {
286
parts.push(def.notes);
287
}
288
289
return parts.join('\n');
290
}
291
292
/** Generate prompt section for intents */
293
export function generateIntentPromptSection(): string {
294
const header = '## Intent Categories\n';
295
const categories = Object.entries(INTENT_DEFINITIONS)
296
.map(([key, def]) => formatCategoryForPrompt(key, def))
297
.join('\n\n');
298
return header + categories;
299
}
300
301
/** Generate prompt section for domains */
302
export function generateDomainPromptSection(): string {
303
const header = '## Domain Categories\n';
304
const categories = Object.entries(DOMAIN_DEFINITIONS)
305
.map(([key, def]) => formatCategoryForPrompt(key, def))
306
.join('\n\n');
307
return header + categories;
308
}
309
310
/** Generate prompt section for scopes */
311
export function generateScopePromptSection(): string {
312
const header = '# SCOPE - What code context is needed (choose ONE)\n';
313
const categories = Object.entries(SCOPE_DEFINITIONS)
314
.map(([key, def]) => formatCategoryForPrompt(key, def))
315
.join('\n\n');
316
return header + categories;
317
}
318
319
/** Classification guidance for the LLM */
320
const CLASSIFICATION_GUIDANCE = `# CLASSIFICATION GUIDANCE
321
322
## Domain vs Intent — these are separate dimensions
323
324
Domain and intent are independent. Classify each on its own merits. Do NOT substitute one for the other.
325
326
**Domain** is the technical subject area or problem space the user is operating in.
327
- It describes a system, architecture, technology area, or problem space — never an activity.
328
- Think of it as answering: "What area of technology is this about?"
329
- If the prompt does not clearly indicate a technical domain, use \`need_info\`.
330
331
**Intent** is the developer action or goal being performed within that domain.
332
- It describes what the user is trying to accomplish — the verb, not the noun.
333
- Think of it as answering: "What is the user trying to do?"
334
- If the prompt does not clearly indicate an intent, use \`need_info\`.
335
336
**Key rule**: A prompt about CI/CD pipelines (domain) might be asking for an explanation (intent), troubleshooting (intent), or code review (intent). Classify each dimension independently. Never let the domain influence your intent classification or vice versa.
337
338
Focus on semantic meaning, not keyword matching. Keywords are illustrative, not exhaustive.
339
340
## Pre-classification check
341
1. **What technical area does this fall into?** Match to the most specific domain category.
342
2. **If multiple domains apply**, choose the primary one — the domain that best captures what the user is actually trying to accomplish.
343
3. **What is the user trying to do?** Match to the most specific intent category.
344
4. **If multiple intents apply**, choose the primary one — the intent that best captures the user's goal.`;
345
346
/** Generate full taxonomy prompt */
347
export function generateTaxonomyPrompt(): string {
348
return [
349
CLASSIFICATION_GUIDANCE,
350
generateDomainPromptSection(),
351
generateIntentPromptSection(),
352
'# TIME ESTIMATE',
353
'Estimate how long an **experienced developer familiar with the codebase** would take:',
354
'- Consider: understanding requirements, writing code, testing, debugging, code review',
355
'- Format: ISO 8601 duration (e.g., "PT5M" for 5 minutes, "PT1H30M" for 1.5 hours)',
356
'- Provide both "bestCase" (everything goes smoothly) and "realistic" (typical complications)',
357
'',
358
generateScopePromptSection(),
359
].join('\n\n');
360
}
361
362
// ============================================================================
363
// Tool calling schema for structured output
364
// ============================================================================
365
366
/** Tool name for prompt categorization */
367
export const CATEGORIZE_PROMPT_TOOL_NAME = 'categorize_prompt';
368
369
/** JSON Schema for the categorize_prompt tool parameters */
370
export const CATEGORIZE_PROMPT_TOOL_SCHEMA = {
371
type: 'object',
372
additionalProperties: false,
373
properties: {
374
intent: {
375
type: 'string',
376
enum: Object.keys(INTENT_DEFINITIONS),
377
description: 'The primary action the user wants to perform'
378
},
379
domain: {
380
type: 'string',
381
enum: Object.keys(DOMAIN_DEFINITIONS),
382
description: 'The area of code or system the request relates to'
383
},
384
scope: {
385
type: 'string',
386
enum: Object.keys(SCOPE_DEFINITIONS),
387
description: 'The code context required to fulfill the request'
388
},
389
timeEstimate: {
390
type: 'object',
391
additionalProperties: false,
392
properties: {
393
bestCase: {
394
type: 'string',
395
description: 'ISO 8601 duration for best case scenario (e.g., "PT5M" for 5 minutes)'
396
},
397
realistic: {
398
type: 'string',
399
description: 'ISO 8601 duration for realistic scenario (e.g., "PT15M" for 15 minutes)'
400
}
401
},
402
required: ['bestCase', 'realistic']
403
},
404
confidence: {
405
type: 'number',
406
minimum: 0,
407
maximum: 1,
408
description: 'Confidence score between 0.0 and 1.0'
409
},
410
reasoning: {
411
type: 'string',
412
description: 'Brief 1-2 sentence explanation for the classification'
413
}
414
},
415
required: ['intent', 'domain', 'scope', 'timeEstimate', 'confidence', 'reasoning']
416
} as const;
417
418