Path: blob/main/test/benchmark/benchmark_utf8.c
4130 views
// Copyright 2016 The Emscripten Authors. All rights reserved.1// Emscripten is available under two separate licenses, the MIT license and the2// University of Illinois/NCSA Open Source License. Both these licenses can be3// found in the LICENSE file.45#include <stdio.h>6#include <string.h>7#include <wchar.h>8#include <assert.h>9#include <emscripten.h>10#include <time.h>1112EM_JS_DEPS(deps, "$UTF8ToString,emscripten_get_now");1314double test(const char *str) {15double res = EM_ASM_DOUBLE({16var t0 = _emscripten_get_now();17var str = UTF8ToString($0);18var t1 = _emscripten_get_now();19// out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));20return (t1-t0);21}, str);22return res;23}2425char *utf8_corpus = 0;26long utf8_corpus_length = 0;2728char *randomString(int len) {29if (!utf8_corpus) {30FILE *handle = fopen("utf8_corpus.txt", "rb");31fseek(handle, 0, SEEK_END);32utf8_corpus_length = ftell(handle);33assert(utf8_corpus_length > 0);34utf8_corpus = malloc(utf8_corpus_length+1);35fseek(handle, 0, SEEK_SET);36fread(utf8_corpus, 1, utf8_corpus_length, handle);37fclose(handle);38utf8_corpus[utf8_corpus_length] = '\0';39}40int startIdx = rand() % (utf8_corpus_length - len);41while (((unsigned char)utf8_corpus[startIdx] & 0xC0) == 0x80) {42++startIdx;43if (startIdx + len > utf8_corpus_length) len = utf8_corpus_length - startIdx;44}45assert(len > 0);46char *s = malloc(len+1);47memcpy(s, utf8_corpus + startIdx, len);48s[len] = '\0';49while (len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0x80) { s[--len] = '\0'; }50while (len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0xC0) { s[--len] = '\0'; }51assert(len >= 0);52return s;53}5455int main() {56double t = 0;57double t2 = emscripten_get_now();58for (int i = 0; i < 100000; ++i) {59// Create strings of lengths 1-32, because the internals of text decoding60// have a cutoff of 16 for when to use TextDecoder, and we wish to test both61// (see UTF8ArrayToString).62char *str = randomString((i % 32) + 1);63t += test(str);64free(str);65}66double t3 = emscripten_get_now();67printf("OK. Time: %f (%f).\n", t, t3-t2);68return 0;69}707172