Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/extension/prompt/common/codeGuesser.ts
13399 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { isBasicASCII } from '../../../util/vs/base/common/strings';
7
8
export function looksLikeCode(text: string): boolean {
9
const lines = text.split(/\r?\n/);
10
const lineTypes = lines.map(guessLineType);
11
const codeLineCount = lineTypes.filter(type => type === GuessedLineType.Code).length;
12
const naturalLanguageLineCount = lineTypes.filter(type => type === GuessedLineType.NaturalLanguage).length;
13
return codeLineCount > naturalLanguageLineCount;
14
}
15
16
const enum GuessedLineType {
17
Unknown,
18
Code,
19
NaturalLanguage
20
}
21
22
function guessLineType(line: string): GuessedLineType {
23
if (line.length === 0) {
24
return GuessedLineType.Unknown;
25
}
26
let naturalLanguageScore = 0;
27
let codeScore = 0;
28
29
// There are some super strong low hanging hints that a line is code
30
const obviousCodeSyntax = ['==', '!=', '===', '!==', '>=', '<=', '&&', '||', '>>', '>>>', '<<', '<<<', '+=', '-=', '*=', '/=', '%=', '<<=', '<<<=', '>>=', '>>>=', '++', '--', '=>', '->', '...', '??', '??='];
31
if (obviousCodeSyntax.some(syntax => line.includes(syntax))) {
32
return GuessedLineType.Code;
33
}
34
35
// If a line starts with whitespace or syntactical characters, it's probably code
36
if (line.match(/^\s/) || line.match(/^[;{}()\[\]`~?]/)) {
37
return GuessedLineType.Code;
38
}
39
40
// Natural Language Hints
41
{
42
// if the first character is upper-case
43
if (line.charAt(0).match(/[A-Z]/)) {
44
naturalLanguageScore += 1;
45
}
46
// if the line ends with a period
47
if (line[line.length - 1] === '.') {
48
naturalLanguageScore += 1;
49
}
50
// if the line has CJK characters
51
if (!isBasicASCII(line)) {
52
naturalLanguageScore += 1;
53
}
54
}
55
56
// Code Hints
57
{
58
// if the first character is ASCII but not upper-case
59
if (isBasicASCII(line.charAt(0)) && !line.charAt(0).match(/[A-Z]/)) {
60
codeScore += 1;
61
}
62
// if the line starts with tabs or spaces
63
if (line.match(/^\s/)) {
64
codeScore += 1;
65
}
66
// if the line contains common characters used for programming
67
const commonCodeChars = [';', '{', '}', '(', ')', '[', ']', '`', '~', '#', '$', '%', '^', '&', '*', '_', '=', '+', '\\', '|', '<', '>'];
68
const commonCodeCharsCounts = commonCodeChars.map(char => (line.includes(char) ? 1 : 0)).filter(x => x).length;
69
codeScore += commonCodeCharsCounts;
70
}
71
72
if (naturalLanguageScore > codeScore) {
73
return GuessedLineType.NaturalLanguage;
74
}
75
if (codeScore > naturalLanguageScore) {
76
return GuessedLineType.Code;
77
}
78
return GuessedLineType.Unknown;
79
}
80
81