Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/platform/embeddings/test/node/packEmbedding.spec.ts
13405 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import assert from 'assert';
7
import { suite, test } from 'vitest';
8
import { Embedding, EmbeddingType } from '../../../embeddings/common/embeddingsComputer';
9
import { packEmbedding, unpackEmbedding } from '../../common/embeddingsStorage';
10
11
suite('Pack Embedding', () => {
12
test('Text3small should pack and unpack to same values', () => {
13
const embedding: Embedding = {
14
type: EmbeddingType.text3small_512,
15
// Start with float32 array so that we don't check for the very small rounding
16
// that can happen when going from js number -> float32
17
value: Array.from(Float32Array.from({ length: 512 }, () => Math.random())),
18
};
19
20
const serialized = packEmbedding(embedding);
21
const deserialized = unpackEmbedding(EmbeddingType.text3small_512, serialized);
22
assert.deepStrictEqual(deserialized.value.length, embedding.value.length);
23
assert.deepStrictEqual(deserialized.value, embedding.value);
24
});
25
26
test('Metis should use binary storage', () => {
27
const embedding: Embedding = {
28
type: EmbeddingType.metis_1024_I16_Binary,
29
value: Array.from({ length: 1024 }, () => Math.random() < 0.5 ? 0.03125 : -0.03125)
30
};
31
32
const serialized = packEmbedding(embedding);
33
assert.strictEqual(serialized.length, 1024 / 8);
34
35
const deserialized = unpackEmbedding(EmbeddingType.metis_1024_I16_Binary, serialized);
36
assert.deepStrictEqual(deserialized.value.length, embedding.value.length);
37
assert.deepStrictEqual(deserialized.value, embedding.value);
38
});
39
40
test('Unpack should work with buffer offsets', () => {
41
const embedding: Embedding = {
42
type: EmbeddingType.metis_1024_I16_Binary,
43
value: Array.from({ length: 1024 }, () => Math.random() < 0.5 ? 0.03125 : -0.03125)
44
};
45
46
const serialized = packEmbedding(embedding);
47
48
// Now create a new buffer and write the serialized data to it at an offset
49
const prefixAndSuffixSize = 512;
50
const buffer = new Uint8Array(serialized.length + prefixAndSuffixSize * 2);
51
for (let i = 0; i < serialized.length; i++) {
52
buffer[i + prefixAndSuffixSize] = serialized[i];
53
}
54
55
const serializedCopy = new Uint8Array(buffer.buffer, prefixAndSuffixSize, serialized.length);
56
57
const deserialized = unpackEmbedding(EmbeddingType.metis_1024_I16_Binary, serializedCopy);
58
assert.deepStrictEqual(deserialized.value.length, embedding.value.length);
59
assert.deepStrictEqual(deserialized.value, embedding.value);
60
});
61
62
test('Unpack should work with old style metis data', () => {
63
const embedding: Embedding = {
64
type: EmbeddingType.metis_1024_I16_Binary,
65
value: Array.from({ length: 1024 }, () => Math.random() < 0.5 ? 0.03125 : -0.03125)
66
};
67
68
// Don't use pack
69
const float32Buf = Float32Array.from(embedding.value);
70
const serialized = new Uint8Array(float32Buf.buffer, float32Buf.byteOffset, float32Buf.byteLength);
71
72
const deserialized = unpackEmbedding(EmbeddingType.metis_1024_I16_Binary, serialized);
73
assert.deepStrictEqual(deserialized.value.length, embedding.value.length);
74
assert.deepStrictEqual(deserialized.value, embedding.value);
75
});
76
});
77
78