Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/test/benchmark/benchmark_utf8.c
4130 views
1
// Copyright 2016 The Emscripten Authors. All rights reserved.
2
// Emscripten is available under two separate licenses, the MIT license and the
3
// University of Illinois/NCSA Open Source License. Both these licenses can be
4
// found in the LICENSE file.
5
6
#include <stdio.h>
7
#include <string.h>
8
#include <wchar.h>
9
#include <assert.h>
10
#include <emscripten.h>
11
#include <time.h>
12
13
EM_JS_DEPS(deps, "$UTF8ToString,emscripten_get_now");
14
15
double test(const char *str) {
16
double res = EM_ASM_DOUBLE({
17
var t0 = _emscripten_get_now();
18
var str = UTF8ToString($0);
19
var t1 = _emscripten_get_now();
20
// out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
21
return (t1-t0);
22
}, str);
23
return res;
24
}
25
26
char *utf8_corpus = 0;
27
long utf8_corpus_length = 0;
28
29
char *randomString(int len) {
30
if (!utf8_corpus) {
31
FILE *handle = fopen("utf8_corpus.txt", "rb");
32
fseek(handle, 0, SEEK_END);
33
utf8_corpus_length = ftell(handle);
34
assert(utf8_corpus_length > 0);
35
utf8_corpus = malloc(utf8_corpus_length+1);
36
fseek(handle, 0, SEEK_SET);
37
fread(utf8_corpus, 1, utf8_corpus_length, handle);
38
fclose(handle);
39
utf8_corpus[utf8_corpus_length] = '\0';
40
}
41
int startIdx = rand() % (utf8_corpus_length - len);
42
while (((unsigned char)utf8_corpus[startIdx] & 0xC0) == 0x80) {
43
++startIdx;
44
if (startIdx + len > utf8_corpus_length) len = utf8_corpus_length - startIdx;
45
}
46
assert(len > 0);
47
char *s = malloc(len+1);
48
memcpy(s, utf8_corpus + startIdx, len);
49
s[len] = '\0';
50
while (len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0x80) { s[--len] = '\0'; }
51
while (len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0xC0) { s[--len] = '\0'; }
52
assert(len >= 0);
53
return s;
54
}
55
56
int main() {
57
double t = 0;
58
double t2 = emscripten_get_now();
59
for (int i = 0; i < 100000; ++i) {
60
// Create strings of lengths 1-32, because the internals of text decoding
61
// have a cutoff of 16 for when to use TextDecoder, and we wish to test both
62
// (see UTF8ArrayToString).
63
char *str = randomString((i % 32) + 1);
64
t += test(str);
65
free(str);
66
}
67
double t3 = emscripten_get_now();
68
printf("OK. Time: %f (%f).\n", t, t3-t2);
69
return 0;
70
}
71
72