Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/test/benchmark/benchmark_utf16.cpp
4133 views
1
// Copyright 2016 The Emscripten Authors. All rights reserved.
2
// Emscripten is available under two separate licenses, the MIT license and the
3
// University of Illinois/NCSA Open Source License. Both these licenses can be
4
// found in the LICENSE file.
5
6
#include <stdio.h>
7
#include <string.h>
8
#include <wchar.h>
9
#include <iostream>
10
#include <cassert>
11
#include <emscripten.h>
12
13
EM_JS_DEPS(deps, "$UTF16ToString");
14
15
double test(const unsigned short *str) {
16
double res = EM_ASM_DOUBLE({
17
var t0 = _emscripten_get_now();
18
var str = UTF16ToString($0);
19
var t1 = _emscripten_get_now();
20
out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
21
return (t1-t0);
22
}, str);
23
return res;
24
}
25
26
unsigned short *utf16_corpus = 0;
27
long utf16_corpus_length = 0;
28
29
unsigned short *randomString(int len) {
30
if (!utf16_corpus) {
31
// FILE *handle = fopen("ascii_corpus.txt", "rb");
32
FILE *handle = fopen("utf16_corpus.txt", "rb");
33
fseek(handle, 0, SEEK_END);
34
utf16_corpus_length = ftell(handle)/2;
35
assert(utf16_corpus_length > 0);
36
utf16_corpus = new unsigned short[utf16_corpus_length+1];
37
fseek(handle, 0, SEEK_SET);
38
fread(utf16_corpus, 2, utf16_corpus_length, handle);
39
fclose(handle);
40
utf16_corpus[utf16_corpus_length] = 0;
41
}
42
int startIdx = rand() % (utf16_corpus_length - len);
43
while((utf16_corpus[startIdx] & 0xFF00) == 0xDC00) {
44
++startIdx;
45
if (startIdx + len > utf16_corpus_length) len = utf16_corpus_length - startIdx;
46
}
47
assert(len > 0);
48
unsigned short *s = new unsigned short[len+1];
49
memcpy(s, utf16_corpus + startIdx, len*2);
50
s[len] = 0;
51
while(((unsigned short)s[len-1] & 0xFF00) == 0xD800) { s[--len] = 0; }
52
assert(len >= 0);
53
return s;
54
}
55
56
int main() {
57
double t = 0;
58
double t2 = emscripten_get_now();
59
for(int i = 0; i < 10; ++i) {
60
// FF Nightly: Already on small strings of 64 bytes in length, TextDecoder trumps in performance.
61
unsigned short *str = randomString(100);
62
t += test(str);
63
delete [] str;
64
}
65
double t3 = emscripten_get_now();
66
printf("OK. Time: %f (%f).\n", t, t3-t2);
67
return 0;
68
}
69
70