/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2024 Strahinja Stanisic <[email protected]>4*/56#include <machine/asm.h>78/*9* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord10* uses haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)11* which evalutates > 0 when there is zero in v12*13* register a0 - char *s14*/15ENTRY(strlen)16/*17* register a0 - char *str_start18* register a1 - char *str_ptr19* register a2 - char[8] iter20*/2122/* load constants for haszero */23li t0, 0x010101010101010124slli t1, t0, 7 # 0x8080808080808080, avoid li2526/* check alignment of str_start */27andi a1, a0, ~0b11128ld a2, (a1)29beq a1, a0, .Lhas_zero3031/* fill bytes before str_start with non-zero */32slli t2, a0, 333addi t3, t2, -6434neg t3, t335srl t3, t0, t336or a2, a2, t33738/* unrolled iteration of haszero */39not t2, a240sub a2, a2, t041and a2, a2, t242and a2, a2, t14344bnez a2, .Lfind_zero4546.Lloop_has_zero:47ld a2, 8(a1)48addi a1, a1, 8 # move ptr to next 8byte49.Lhas_zero:50not t2, a251sub a2, a2, t052and a2, a2, t253and a2, a2, t15455beqz a2, .Lloop_has_zero5657.Lfind_zero:58/* use (iter & -iter) to isolate lowest set bit */59sub a3, zero, a2 #a3 = -iter60and t1, a2, a3 #t1 = (iter & -iter)6162li t0, 0x000102030405060763srli t1, t1, 764/*65* lowest set bit is 2^(8*k)66* multiplying by it shifts the idx array in t0 by k bytes to the left67*/68mul t1, t1, t069/* highest byte contains idx of first zero */70srli t1, t1, 567172add a1, a1, t173sub a0, a1, a074ret75END(strlen)76777879