Path: blob/main/contrib/arm-optimized-routines/string/arm/memchr.S
39556 views
/*1* memchr - scan memory for a character2*3* Copyright (c) 2010-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/*8Written by Dave Gilbert <[email protected]>910This __memchr_arm routine is optimised on a Cortex-A9 and should work on11all ARMv7 processors. It has a fast past for short sizes, and has12an optimised path for large data sets; the worst case is finding the13match early in a large data set.1415*/1617@ 2011-02-07 david.gilbert@linaro.org18@ Extracted from local git a5b438d86119@ 2011-07-14 david.gilbert@linaro.org20@ Import endianness fix from local git ea786f1b21@ 2011-12-07 david.gilbert@linaro.org22@ Removed unneeded cbz from align loop2324.syntax unified25#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M'26/* keep config inherited from -march= */27#else28.arch armv7-a29#endif3031@ this lets us check a flag in a 00/ff byte easily in either endianness32#ifdef __ARMEB__33#define CHARTSTMASK(c) 1<<(31-(c*8))34#else35#define CHARTSTMASK(c) 1<<(c*8)36#endif37.thumb38#include "asmdefs.h"394041@ ---------------------------------------------------------------------------42.thumb_func43.align 244.p2align 4,,1545.global __memchr_arm46.type __memchr_arm,%function47.fnstart48.cfi_startproc49__memchr_arm:50@ r0 = start of memory to scan51@ r1 = character to look for52@ r2 = length53@ returns r0 = pointer to character or NULL if not found54prologue55and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char5657cmp r2,#16 @ If it's short don't bother with anything clever58blt 20f5960tst r0, #7 @ If it's already aligned skip the next bit61beq 10f6263@ Work up to an aligned point645:65ldrb r3, [r0],#166subs r2, r2, #167cmp r3, r168beq 50f @ If it matches exit found69tst r0, #770bne 5b @ If not aligned yet then do next byte717210:73@ At this point, we are aligned, we know we have at least 8 bytes to work with74push {r4,r5,r6,r7}75.cfi_adjust_cfa_offset 1676.cfi_rel_offset 4, 077.cfi_rel_offset 5, 478.cfi_rel_offset 6, 879.cfi_rel_offset 7, 1280orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes81orr r1, r1, r1, lsl #1682bic r4, r2, #7 @ Number of double words to work with83mvns r7, #0 @ all F's84movs r3, #0858615:87ldmia r0!,{r5,r6}88subs r4, r4, #889eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target90eor r6,r6, r191uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 092sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION93uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 094sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION95cbnz r6, 60f96bne 15b @ (Flags from the subs above) If not run out of bytes then go around again9798pop {r4,r5,r6,r7}99.cfi_restore 7100.cfi_restore 6101.cfi_restore 5102.cfi_restore 4103.cfi_adjust_cfa_offset -16104and r1,r1,#0xff @ Get r1 back to a single character from the expansion above105and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done10610720:108cbz r2, 40f @ 0 length or hit the end already then not found10911021: @ Post aligned section, or just a short call111ldrb r3,[r0],#1112subs r2,r2,#1113eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub114cbz r3, 50f115bne 21b @ on r2 flags11611740:118.cfi_remember_state119movs r0,#0 @ not found120epilogue12112250:123.cfi_restore_state124.cfi_remember_state125subs r0,r0,#1 @ found126epilogue12712860: @ We're here because the fast path found a hit - now we have to track down exactly which word it was129@ r0 points to the start of the double word after the one that was tested130@ r5 has the 00/ff pattern for the first word, r6 has the chained value131.cfi_restore_state @ Standard post-prologue state132.cfi_adjust_cfa_offset 16133.cfi_rel_offset 4, 0134.cfi_rel_offset 5, 4135.cfi_rel_offset 6, 8136.cfi_rel_offset 7, 12137cmp r5, #0138itte eq139moveq r5, r6 @ the end is in the 2nd word140subeq r0,r0,#3 @ Points to 2nd byte of 2nd word141subne r0,r0,#7 @ or 2nd byte of 1st word142143@ r0 currently points to the 3rd byte of the word containing the hit144tst r5, # CHARTSTMASK(0) @ 1st character145bne 61f146adds r0,r0,#1147tst r5, # CHARTSTMASK(1) @ 2nd character148ittt eq149addeq r0,r0,#1150tsteq r5, # (3<<15) @ 2nd & 3rd character151@ If not the 3rd must be the last one152addeq r0,r0,#115315461:155pop {r4,r5,r6,r7}156.cfi_restore 7157.cfi_restore 6158.cfi_restore 5159.cfi_restore 4160.cfi_adjust_cfa_offset -16161subs r0,r0,#1162epilogue163.cfi_endproc164.cantunwind165.fnend166167.size __memchr_arm, . - __memchr_arm168169170