Path: blob/main/extensions/copilot/src/extension/inlineEdits/vscode-node/naturalLanguageHint.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import * as vscode from 'vscode';678export namespace LineCheck {910const _keywordsByLanguage = new Map<string, Set<string>>();11_keywordsByLanguage.set('typescript', new Set(['abstract', 'any', 'as', 'asserts', 'async', 'await', 'bigint', 'boolean', 'break', 'case', 'catch', 'class', 'const', 'continue', 'constructor', 'debugger', 'declare', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'from', 'function', 'get', 'if', 'implements', 'import', 'in', 'infer', 'instanceof', 'interface', 'is', 'keyof', 'let', 'module', 'namespace', 'never', 'new', 'null', 'number', 'object', 'of', 'package', 'private', 'protected', 'public', 'readonly', 'require', 'return', 'set', 'static', 'string', 'super', 'switch', 'symbol', 'this', 'throw', 'true', 'try', 'type', 'typeof', 'undefined', 'unique', 'unknown', 'var', 'void', 'while', 'with', 'yield']));12_keywordsByLanguage.set('typescriptreact', new Set(['abstract', 'any', 'as', 'asserts', 'async', 'await', 'bigint', 'boolean', 'break', 'case', 'catch', 'class', 'const', 'continue', 'constructor', 'debugger', 'declare', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'from', 'function', 'get', 'if', 'implements', 'import', 'in', 'infer', 'instanceof', 'interface', 'is', 'keyof', 'let', 'module', 'namespace', 'never', 'new', 'null', 'number', 'object', 'of', 'package', 'private', 'protected', 'public', 'readonly', 'require', 'return', 'set', 'static', 'string', 'super', 'switch', 'symbol', 'this', 'throw', 'true', 'try', 'type', 'typeof', 'undefined', 'unique', 'unknown', 'var', 'void', 'while', 'with', 'yield']));13_keywordsByLanguage.set('javascript', new Set(['async', 'await', 'break', 'case', 'catch', 'class', 'const', 'continue', 'constructor', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'from', 'function', 'get', 'if', 'import', 'in', 'instanceof', 'interface', 'is', 'let', 'new', 'null', 'require', 'return', 'set', 'static', 'string', 'super', 'switch', 'symbol', 'this', 'throw', 'true', 'try', 'type', 'typeof', 'undefined', 'var', 'void', 'while', 'with', 'yield']));14_keywordsByLanguage.set('javascriptreact', new Set(['async', 'await', 'break', 'case', 'catch', 'class', 'const', 'continue', 'constructor', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'from', 'function', 'get', 'if', 'import', 'in', 'instanceof', 'interface', 'is', 'let', 'new', 'null', 'require', 'return', 'set', 'static', 'string', 'super', 'switch', 'symbol', 'this', 'throw', 'true', 'try', 'type', 'typeof', 'undefined', 'var', 'void', 'while', 'with', 'yield']));15_keywordsByLanguage.set('python', new Set(['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield']));16_keywordsByLanguage.set('java', new Set(['abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extends', 'final', 'finally', 'float', 'for', 'goto', 'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long', 'native', 'new', 'null', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 'void', 'volatile', 'while']));17_keywordsByLanguage.set('go', new Set(['break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface', 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type', 'var']));18_keywordsByLanguage.set('csharp', new Set(['abstract', 'as', 'base', 'bool', 'break', 'byte', 'case', 'catch', 'char', 'checked', 'class', 'const', 'continue', 'decimal', 'default', 'delegate', 'do', 'double', 'else', 'enum', 'event', 'explicit', 'extern', 'false', 'finally', 'fixed', 'float', 'for', 'foreach', 'goto', 'if', 'implicit', 'in', 'int', 'interface', 'internal', 'is', 'lock', 'long', 'namespace', 'new', 'null', 'object', 'operator', 'out', 'override', 'params', 'private', 'protected', 'public', 'readonly', 'ref', 'return', 'sbyte', 'sealed', 'short', 'sizeof', 'stackalloc', 'static', 'string', 'struct', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'uint', 'ulong', 'unchecked', 'unsafe', 'ushort', 'using', 'virtual', 'void', 'volatile', 'while']));19_keywordsByLanguage.set('cpp', new Set(['alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', 'atomic_commit', 'atomic_noexcept', 'auto', 'bitand', 'bitor', 'bool', 'break', 'case', 'catch', 'char', 'char8_t', 'char16_t', 'char32_t', 'class', 'compl', 'concept', 'const', 'consteval', 'constexpr', 'constinit', 'const_cast', 'continue', 'co_await', 'co_return', 'co_yield', 'decltype', 'default', 'delete', 'do', 'double', 'dynamic_cast', 'else', 'enum', 'explicit', 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if', 'import', 'inline', 'int', 'long', 'module', 'mutable', 'namespace', 'new', 'noexcept', 'not', 'not_eq', 'nullptr', 'operator', 'or', 'or_eq', 'private', 'protected', 'public', 'reflexpr', 'register', 'reinterpret_cast', 'requires', 'return', 'short', 'signed', 'sizeof', 'static', 'static_assert', 'static_cast', 'struct', 'switch', 'synchronized', 'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typedef', 'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 'volatile', 'wchar_t', 'while', 'xor', 'xor_eq']));20_keywordsByLanguage.set('rust', new Set(['as', 'break', 'const', 'continue', 'crate', 'else', 'enum', 'extern', 'false', 'fn', 'for', 'if', 'impl', 'in', 'let', 'loop', 'match', 'mod', 'move', 'mut', 'pub', 'ref', 'return', 'self', 'Self', 'static', 'struct', 'super', 'trait', 'true', 'type', 'unsafe', 'use', 'where', 'while', 'async', 'await', 'dyn']));21_keywordsByLanguage.set('ruby', new Set(['BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when', 'while', 'yield']));2223// typical keywords of various programming languages24_keywordsByLanguage.set('*', new Set(['abstract', 'as', 'async', 'await', 'break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'from', 'function', 'get', 'if', 'import', 'in', 'instanceof', 'interface', 'is', 'let', 'new', 'null', 'package', 'private', 'protected', 'public', 'return', 'static', 'super', 'switch', 'this', 'throw', 'true', 'try', 'type', 'typeof', 'var', 'void', 'while', 'with', 'yield']));2526export const languages = Array.from(_keywordsByLanguage.keys());2728interface IToken {29type: 'word' | 'keyword' | 'keyword_start' | 'space' | 'other';30value: string;31}3233function _classifyLine(document: vscode.TextDocument, position: vscode.Position): IToken[] {3435const keywords = _keywordsByLanguage.get(document.languageId);3637const result: IToken[] = [];38const line = document.lineAt(position);3940let column = line.firstNonWhitespaceCharacterIndex;41let lastEnd = column;4243while (column < line.range.end.character) {44const pos = new vscode.Position(position.line, column);45const wordRange = document.getWordRangeAtPosition(pos);4647if (!wordRange) {48column += 1;49continue;50}5152const start = wordRange.start.character;53const end = wordRange.end.character;5455if (start !== lastEnd) {56const value = line.text.substring(lastEnd, start);57result.push({58type: value.match(/^\s+$/) ? 'space' : 'other',59value60});61}6263const value = line.text.substring(start, end);64result.push({65type: keywords?.has(value) ? 'keyword' : 'word',66value67});6869column = end + 1;70lastEnd = end;71}72if (lastEnd < line.range.end.character) {73const value = line.text.substring(lastEnd);74result.push({75type: value.match(/^\s+$/) ? 'space' : 'other',76value77});7879}8081const last = result.at(-1);82if (last?.type === 'word') {83// check if this is a keyword prefix84for (const keyword of keywords ?? []) {85if (keyword.startsWith(last.value)) {86last.type = 'keyword_start';87break;88}89}90}9192return result;93}9495export function isNaturalLanguageDominated(document: vscode.TextDocument, position: vscode.Position): boolean {9697// LOGIC: tokenize the line into words (as defined by the language), whitespace, and other98// characters (which can be a mix of whitespace and non-word characters).99100const tokens = _classifyLine(document, position);101102let wordCount = 0;103let keywordCount = 0;104let keywordStartCount = 0;105let spaceCount = 0;106let otherCount = 0;107108for (let i = 0; i < tokens.length; i++) {109const token = tokens[i];110switch (token.type) {111case 'keyword':112keywordCount += 1;113break;114case 'keyword_start':115keywordStartCount += 1;116break;117case 'word':118wordCount += 1;119break;120case 'space':121spaceCount += 1;122break;123case 'other':124otherCount += 1;125break;126}127}128129if (tokens.length < 4 || spaceCount < 2) {130// too little content131return false;132}133134if (keywordCount === 0 && otherCount === 0) {135return false;136}137138if ((keywordCount + keywordStartCount) >= wordCount) {139return false; // too many keywords140}141142if (otherCount >= spaceCount) {143return false; // too much punctuation144}145146return true;147}148}149150151