Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/embeddings/common/embeddingsStorage.ts
13401 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Embedding, EmbeddingType, getWellKnownEmbeddingTypeInfo } from './embeddingsComputer';
7
8
/**
9
* Packs the embedding into a binary value for efficient storage.
10
*/
11
export function packEmbedding(embedding: Embedding): Uint8Array {
12
const embeddingMetadata = getWellKnownEmbeddingTypeInfo(embedding.type);
13
if (embeddingMetadata?.quantization.document === 'binary') {
14
// Generate packed binary
15
if (embedding.value.length % 8 !== 0) {
16
throw new Error(`Embedding value length must be a multiple of 8 for ${embedding.type.id}, got ${embedding.value.length}`);
17
}
18
19
const data = new Uint8Array(embedding.value.length / 8);
20
for (let i = 0; i < embedding.value.length; i += 8) {
21
let value = 0;
22
for (let j = 0; j < 8; j++) {
23
value |= (embedding.value[i + j] >= 0 ? 1 : 0) << j;
24
}
25
data[i / 8] = value;
26
}
27
return data;
28
}
29
30
// All other formats default to float32 for now
31
const data = Float32Array.from(embedding.value);
32
return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
33
}
34
35
/**
36
* Unpacks an embedding from a binary value packed with {@link packEmbedding}.
37
*/
38
export function unpackEmbedding(type: EmbeddingType, data: Uint8Array): Embedding {
39
const embeddingMetadata = getWellKnownEmbeddingTypeInfo(type);
40
if (embeddingMetadata?.quantization.document === 'binary') {
41
// Old metis versions may have stored the values as a float32
42
if (!(type.equals(EmbeddingType.metis_1024_I16_Binary) && data.length >= 1024)) {
43
const values = new Array(data.length * 8);
44
for (let i = 0; i < data.length; i++) {
45
const byte = data[i];
46
for (let j = 0; j < 8; j++) {
47
values[i * 8 + j] = (byte & (1 << j)) > 0 ? 0.03125 : -0.03125;
48
}
49
}
50
return { type, value: values };
51
}
52
}
53
54
const float32Array = new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4);
55
return { type, value: Array.from(float32Array) };
56
}
57
58