/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2024 Strahinja Stanisic <[email protected]>4*/56#include <machine/asm.h>78/*9* a0 - const char *s10* a1 - size_t maxlen;11*/12ENTRY(strnlen)13/*14* a0 - const char *s;15* a1 - size_t maxlen;16* a2 - uint64_t *ptr;17* a3 - char iter[8];18* a4 - uint64_t *end_align;19* a5 - uint64_t *end_unroll;20*/2122beqz a1, .Lnot_found2324/* ptr = s & ~0b111 */25/* t0 = 0x0101010101010101 */26/* t1 = 0x8080808080808080 */27/* end_align = (s + maxlen + 7) & ~0b111 */28/* mask_start = t0 >> ((-s.value) << 3) */29add a4, a0, a130li t0, 0x0101010131addi a4, a4, 732slli t1, t0, 3233neg t2, a034andi a4, a4, ~0b11135or t0, t0, t136slli t2, t2, 337andi a2, a0, ~0b11138slli t1, t0, 739srl t2, t0, t24041/* if pointer is aligned skip to loop */42beq a0, a2, .Lskip_start4344/* iter = *ptr */45ld a3, (a2)4647/* iter = iter | mask_start */48or a3, a3, t24950/* has_zero */51not t2, a352sub a3, a3, t053and t2, t2, t154and a3, a3, t25556addi a2, a2, 857bnez a3, .Lfind_zero5859.Lskip_start:60/* end_unroll */61sub t2, a4, a262andi t2, t2, ~0b111163add a5, a2, t26465/* while (ptr != end_unroll) */66beq a2, a5, .Lskip_loop67.Lloop:68ld a3, (a2)69ld a6, 8(a2)7071/* has_zero */72not t2, a373not t3, a674sub a3, a3, t075sub a6, a6, t076and t2, t2, t177and t3, t3, t178and a3, a3, t279and a6, a6, t38081addi a2, a2, 882bnez a3, .Lfind_zero8384mv a3, a68586addi a2, a2, 887bnez a3, .Lfind_zero8889bne a2, a5, .Lloop9091.Lskip_loop:9293beq a2, a4, .Lnot_found9495ld a3, (a2)9697/* has_zero */98not t2, a399sub a3, a3, t0100and t2, t2, t1101and a3, a3, t2102103104addi a2, a2, 8105beqz a3, .Lnot_found106107.Lfind_zero:108109/* move ptr back */110addi a2, a2, -8111112/* isolate lowest set bit */113neg t0, a3114and a3, a3, t0115116li t0, 0x0001020304050607117srli a3, a3, 7118119/* lowest set bit is 2^(8*k)120* multiplying by it shifts the idx array in t0 by k bytes to the left */121mul a3, a3, t0122123/* highest byte contains idx of first zero */124srli a3, a3, 56125126/* zero_idx */127sub a2, a2, a0128add a2, a2, a3129130/* min(zero_idx, maxlen) */131sub a2, a2, a1132srai t1, a2, 63133and a2, a2, t1134add a0, a1, a2135136ret137138.Lnot_found:139mv a0, a1140ret141142END(strnlen)143144145