Path: blob/main/extensions/copilot/src/extension/prompt/common/codeGuesser.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { isBasicASCII } from '../../../util/vs/base/common/strings';67export function looksLikeCode(text: string): boolean {8const lines = text.split(/\r?\n/);9const lineTypes = lines.map(guessLineType);10const codeLineCount = lineTypes.filter(type => type === GuessedLineType.Code).length;11const naturalLanguageLineCount = lineTypes.filter(type => type === GuessedLineType.NaturalLanguage).length;12return codeLineCount > naturalLanguageLineCount;13}1415const enum GuessedLineType {16Unknown,17Code,18NaturalLanguage19}2021function guessLineType(line: string): GuessedLineType {22if (line.length === 0) {23return GuessedLineType.Unknown;24}25let naturalLanguageScore = 0;26let codeScore = 0;2728// There are some super strong low hanging hints that a line is code29const obviousCodeSyntax = ['==', '!=', '===', '!==', '>=', '<=', '&&', '||', '>>', '>>>', '<<', '<<<', '+=', '-=', '*=', '/=', '%=', '<<=', '<<<=', '>>=', '>>>=', '++', '--', '=>', '->', '...', '??', '??='];30if (obviousCodeSyntax.some(syntax => line.includes(syntax))) {31return GuessedLineType.Code;32}3334// If a line starts with whitespace or syntactical characters, it's probably code35if (line.match(/^\s/) || line.match(/^[;{}()\[\]`~?]/)) {36return GuessedLineType.Code;37}3839// Natural Language Hints40{41// if the first character is upper-case42if (line.charAt(0).match(/[A-Z]/)) {43naturalLanguageScore += 1;44}45// if the line ends with a period46if (line[line.length - 1] === '.') {47naturalLanguageScore += 1;48}49// if the line has CJK characters50if (!isBasicASCII(line)) {51naturalLanguageScore += 1;52}53}5455// Code Hints56{57// if the first character is ASCII but not upper-case58if (isBasicASCII(line.charAt(0)) && !line.charAt(0).match(/[A-Z]/)) {59codeScore += 1;60}61// if the line starts with tabs or spaces62if (line.match(/^\s/)) {63codeScore += 1;64}65// if the line contains common characters used for programming66const commonCodeChars = [';', '{', '}', '(', ')', '[', ']', '`', '~', '#', '$', '%', '^', '&', '*', '_', '=', '+', '\\', '|', '<', '>'];67const commonCodeCharsCounts = commonCodeChars.map(char => (line.includes(char) ? 1 : 0)).filter(x => x).length;68codeScore += commonCodeCharsCounts;69}7071if (naturalLanguageScore > codeScore) {72return GuessedLineType.NaturalLanguage;73}74if (codeScore > naturalLanguageScore) {75return GuessedLineType.Code;76}77return GuessedLineType.Unknown;78}798081