/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2024 Strahinja Stanisic <[email protected]>4*/56#include <machine/asm.h>78.weak rindex9.set rindex, strrchr1011/*12* a0 - const char *s13* a1 - int c14*/15ENTRY(strrchr)16/*17* a0 - const char *ptr_align18* a1 - temporary19* a2 - temporary20* a3 - temporary21* a4 - temporary22* a5 - const char[8] cccccccc23* a6 - const uint64_t *save_align24* a7 - const uint64_t save_iter25* t0 - const uintr64_t REP8_0X0126* t1 - const uintr64_t REP8_0X8027*/2829/*30* save_align = 031* save_iter = 0xFFFFFFFFFFFFFF0032* REP8_0X01 = 0x010101010101010133* cccccccc = (char)c * REP8_0X0134* REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8)35* ptr_align = str - str % 836*/37li t0, 0x0101010138li a6, 039slli a2, a0, 340slli t1, t0, 3241li a7, 0xFFFFFFFFFFFFFF0042or t0, t0, t143andi a1, a1, 0xFF44slli t1, t0, 745andi a0, a0, ~0b11146mul a5, a1, t047sll t1, t1, a24849.Lloop: /* do { */50ld a1, 0(a0) /* a1 -> data = *ptr_align */51not a3, a1 /* a3 -> nhz = ~data */52xor a2, a1, a5 /* a2 -> iter = data ^ cccccccc */53sub a1, a1, t0 /* a1 -> hz = data - REP8_0X01 */54not a4, a2 /* a4 -> nhc = ~iter */55and a1, a1, a3 /* hz = hz & nhz */56sub a3, a2, t0 /* a3 -> hc = iter - REP8_0X01 */57and a1, a1, t1 /* hz = hz & REP8_0X80 */58and a3, a3, a4 /* hc = hc & nhc */59addi a4, a1, -1 /* a4 -> mask_end = hz - 1 */60and a3, a3, t1 /* hc = hc & REP8_0X80 */61xor a4, a4, a1 /* mask_end = mask_end ^ hz */62addi a0, a0, 8 /* ptr_align = ptr_align + 8 */63and a3, a3, a4 /* hc = hc & mask_end */64slli t1, t0, 7 /* REP8_0X80 = REP8_0X01 << 7 */65not a4, a4 /* mask_end = ~mask_end */6667beqz a3, .Lskip_save /* if(!hc) goto skip_save */68or a2, a2, a4 /* iter = iter | mask_end */69addi a6, a0, -8 /* save_align = ptr_align - 8 */70mv a7, a2 /* save_iter = iter */7172.Lskip_save:73beqz a1, .Lloop /* } while(!hz) */7475.Lfind_char:76/*77* a1 -> iter = save_iter78* a2 -> mask_iter = 0xFF0000000000000079* a3 -> match_off = 780*/81li a2, 0xFF82mv a1, a783slli a2, a2, 5684li a3, 78586and a0, a1, a287srli a2, a2, 888beqz a0, .Lret8990addi a3, a3, -191and a0, a1, a292srli a2, a2, 893beqz a0, .Lret9495addi a3, a3, -196and a0, a1, a297srli a2, a2, 898beqz a0, .Lret99100addi a3, a3, -1101and a0, a1, a2102srli a2, a2, 8103beqz a0, .Lret104105addi a3, a3, -1106and a0, a1, a2107srli a2, a2, 8108beqz a0, .Lret109110addi a3, a3, -1111and a0, a1, a2112srli a2, a2, 8113beqz a0, .Lret114115addi a3, a3, -1116and a0, a1, a2117srli a2, a2, 8118beqz a0, .Lret119120addi a3, a3, -1121122.Lret:123/* return save_align + match_offset */124add a0, a6, a3125ret126END(strrchr)127128129