Path: blob/main/extensions/copilot/src/extension/prompt/common/streamingGrammar.ts
13399 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Microsoft Corporation. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45import { Iterable } from '../../../util/vs/base/common/iterator';67export interface IToken<S> {8state: S;9transitionTo?: S;10token: string;11}1213/**14* Defines a grammar that moves the streaming response between states (S).15* You give it an initial state and then a map of states the grammar is in, to16* substrings that it looks for to move into a different state.17*/18export class StreamingGrammar<S extends string | number> {19public state: S;20private accumulator = '';2122private currentEntries: [string, S][] = [];2324/**25* A list of tokens seen so far. "tokens" are substrings of the `deltaText`.26* 'transitionTo' is set for tokens seen in the grammar when a state transiton happens.27*/28public readonly tokens: IToken<S>[] = [];2930constructor(31initialState: S,32private readonly grammar: { [K in S]?: { [token: string]: S } },33) {34this.state = initialState;35this.currentEntries = Object.entries(grammar[initialState] || {});36}3738/**39* Gets whether the given state was visited.40*/41public visited(state: S) {42return this.tokens.some(token => token.state === state);43}4445/**46* Convenience function that accumulates the string of tokens between47* the given indices, optionally only for tokens in the given state.48*/49public accumulate(fromIndex = 0, toIndex = this.tokens.length, whenState?: S) {50let str = '';51for (let i = fromIndex; i < toIndex && i < this.tokens.length; i++) {52const token = this.tokens[i];53if (whenState !== undefined) {54if (token.state === whenState && !token.transitionTo) {55str += token.token;56}57} else {58str += token.token;59}60}6162return str;63}6465/** Appends text, returns the tokens that were added as a convenience. */66public append(deltaText: string): Iterable<IToken<S>> {67let maxLength = 0;68let found: { index: number; length: number; toState: S } | undefined;69const startIndex = this.tokens.length;7071this.accumulator += deltaText;72for (const [key, toState] of this.currentEntries) {73const index = this.accumulator.indexOf(key);74if (index !== -1 && (!found || index < found.index)) {75found = { index, toState, length: key.length };76}7778maxLength = Math.max(maxLength, key.length);79}8081if (found) {82if (found.index > 0) {83this.tokens.push({ state: this.state, token: this.accumulator.slice(0, found.index) });84}85this.tokens.push({ state: this.state, token: this.accumulator.slice(found.index, found.index + found.length), transitionTo: found.toState });8687const remainder = this.accumulator.slice(found.index + found.length);88this.state = found.toState;89this.currentEntries = Object.entries(this.grammar[found.toState] || {});90this.accumulator = '';91this.append(remainder);92} else if (this.accumulator.length > maxLength) {93// todo: we could use a boyer-moore-horspool lookup table to reduce94// the amoung of accumulated text we need to keep95this.tokens.push({ state: this.state, token: this.accumulator.slice(0, this.accumulator.length - maxLength) });96this.accumulator = this.accumulator.slice(this.accumulator.length - maxLength);97}9899return Iterable.slice(this.tokens, startIndex);100}101102public flush(): Iterable<IToken<S>> {103if (this.accumulator) {104this.tokens.push({ state: this.state, token: this.accumulator });105return Iterable.slice(this.tokens, -1);106}107108return Iterable.empty();109}110}111112113