Path: blob/main/contrib/arm-optimized-routines/string/bench/strlen.c
39534 views
/*1* strlen benchmark.2*3* Copyright (c) 2020-2021, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#define _GNU_SOURCE8#include <stdint.h>9#include <stdio.h>10#include <string.h>11#include <assert.h>12#include "stringlib.h"13#include "benchlib.h"1415#define ITERS 500016#define ITERS2 4000000017#define ITERS3 400000018#define NUM_TESTS 655361920#define MAX_ALIGN 3221#define MAX_STRLEN 1282223static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));2425#define DOTEST(STR,TESTFN) \26printf (STR); \27RUN (TESTFN, strlen); \28RUNA64 (TESTFN, __strlen_aarch64); \29RUNA64 (TESTFN, __strlen_aarch64_mte); \30RUNSVE (TESTFN, __strlen_aarch64_sve); \31RUNT32 (TESTFN, __strlen_armv6t2); \32printf ("\n");3334static uint16_t strlen_tests[NUM_TESTS];3536typedef struct { uint16_t size; uint16_t freq; } freq_data_t;37typedef struct { uint8_t align; uint16_t freq; } align_data_t;3839#define SIZE_NUM 6553640#define SIZE_MASK (SIZE_NUM - 1)41static uint8_t strlen_len_arr[SIZE_NUM];4243/* Frequency data for strlen sizes up to 128 based on SPEC2017. */44static freq_data_t strlen_len_freq[] =45{46{ 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115},47{ 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418},48{ 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79},49{ 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21},50{ 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9},51{ 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5},52{ 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2},53{ 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1},54{ 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1},55{ 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1},56{ 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1},57{107, 1}, { 0, 0}58};5960#define ALIGN_NUM 102461#define ALIGN_MASK (ALIGN_NUM - 1)62static uint8_t strlen_align_arr[ALIGN_NUM];6364/* Alignment data for strlen based on SPEC2017. */65static align_data_t string_align_freq[] =66{67{8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}68};6970static void71init_strlen_distribution (void)72{73int i, j, freq, size, n;7475for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)76for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)77strlen_len_arr[n++] = size;78assert (n == SIZE_NUM);7980for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)81for (j = 0, size = string_align_freq[i].align; j < freq; j++)82strlen_align_arr[n++] = size;83assert (n == ALIGN_NUM);84}8586static void87init_strlen_tests (void)88{89uint16_t index[MAX_ALIGN];9091memset (a, 'x', sizeof (a));9293/* Create indices for strings at all alignments. */94for (int i = 0; i < MAX_ALIGN; i++)95{96index[i] = i * (MAX_STRLEN + 1);97a[index[i] + MAX_STRLEN] = 0;98}99100/* Create a random set of strlen input strings using the string length101and alignment distributions. */102for (int n = 0; n < NUM_TESTS; n++)103{104int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];105int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];106107strlen_tests[n] =108index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;109assert ((strlen_tests[n] & (align - 1)) == 0);110assert (strlen (a + strlen_tests[n]) == exp_len);111}112}113114static volatile size_t maskv = 0;115116static void inline __attribute ((always_inline))117strlen_random (const char *name, size_t (*fn)(const char *))118{119size_t res = 0, mask = maskv;120uint64_t strlen_size = 0;121printf ("%22s ", name);122123for (int c = 0; c < NUM_TESTS; c++)124strlen_size += fn (a + strlen_tests[c]) + 1;125strlen_size *= ITERS;126127/* Measure throughput of strlen. */128uint64_t t = clock_get_ns ();129for (int i = 0; i < ITERS; i++)130for (int c = 0; c < NUM_TESTS; c++)131res += fn (a + strlen_tests[c]);132t = clock_get_ns () - t;133printf ("tp: %.3f ", (double)strlen_size / t);134135/* Measure latency of strlen result with (res & mask). */136t = clock_get_ns ();137for (int i = 0; i < ITERS; i++)138for (int c = 0; c < NUM_TESTS; c++)139res += fn (a + strlen_tests[c] + (res & mask));140t = clock_get_ns () - t;141printf ("lat: %.3f\n", (double)strlen_size / t);142maskv = res & mask;143}144145static void inline __attribute ((always_inline))146strlen_small_aligned (const char *name, size_t (*fn)(const char *))147{148printf ("%22s ", name);149150size_t res = 0, mask = maskv;151for (int size = 1; size <= 64; size *= 2)152{153memset (a, 'x', size);154a[size - 1] = 0;155156uint64_t t = clock_get_ns ();157for (int i = 0; i < ITERS2; i++)158res += fn (a + (i & mask));159t = clock_get_ns () - t;160printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,161size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);162}163maskv &= res;164printf ("\n");165}166167static void inline __attribute ((always_inline))168strlen_small_unaligned (const char *name, size_t (*fn)(const char *))169{170printf ("%22s ", name);171172size_t res = 0, mask = maskv;173int align = 9;174for (int size = 1; size <= 64; size *= 2)175{176memset (a + align, 'x', size);177a[align + size - 1] = 0;178179uint64_t t = clock_get_ns ();180for (int i = 0; i < ITERS2; i++)181res += fn (a + align + (i & mask));182t = clock_get_ns () - t;183printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,184size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);185}186maskv &= res;187printf ("\n");188}189190static void inline __attribute ((always_inline))191strlen_medium (const char *name, size_t (*fn)(const char *))192{193printf ("%22s ", name);194195size_t res = 0, mask = maskv;196for (int size = 128; size <= 4096; size *= 2)197{198memset (a, 'x', size);199a[size - 1] = 0;200201uint64_t t = clock_get_ns ();202for (int i = 0; i < ITERS3; i++)203res += fn (a + (i & mask));204t = clock_get_ns () - t;205printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,206size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);207}208maskv &= res;209printf ("\n");210}211212int main (void)213{214rand32 (0x12345678);215init_strlen_distribution ();216init_strlen_tests ();217218DOTEST ("Random strlen (bytes/ns):\n", strlen_random);219DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);220DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);221DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);222223return 0;224}225226227