Path: blob/main/contrib/arm-optimized-routines/string/arm/memset.S
39556 views
/*1* memset - fill memory with a constant2*3* Copyright (c) 2010-2021, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/*8Written by Dave Gilbert <[email protected]>910This memset routine is optimised on a Cortex-A9 and should work on11all ARMv7 processors.1213*/1415.syntax unified16.arch armv7-a1718@ 2011-08-30 david.gilbert@linaro.org19@ Extracted from local git 2f11b4362021@ this lets us check a flag in a 00/ff byte easily in either endianness22#ifdef __ARMEB__23#define CHARTSTMASK(c) 1<<(31-(c*8))24#else25#define CHARTSTMASK(c) 1<<(c*8)26#endif27.thumb2829@ ---------------------------------------------------------------------------30.thumb_func31.align 232.p2align 4,,1533.global __memset_arm34.type __memset_arm,%function35__memset_arm:36@ r0 = address37@ r1 = character38@ r2 = count39@ returns original address in r04041mov r3, r0 @ Leave r0 alone42cbz r2, 10f @ Exit if 0 length4344tst r0, #745beq 2f @ Already aligned4647@ Ok, so we're misaligned here481:49strb r1, [r3], #150subs r2,r2,#151tst r3, #752cbz r2, 10f @ Exit if we hit the end53bne 1b @ go round again if still misaligned54552:56@ OK, so we're aligned57push {r4,r5,r6,r7}58bics r4, r2, #15 @ if less than 16 bytes then need to finish it off59beq 5f60613:62@ POSIX says that ch is cast to an unsigned char. A uxtb is one63@ byte and takes two cycles, where an AND is four bytes but one64@ cycle.65and r1, #0xFF66orr r1, r1, r1, lsl#8 @ Same character into all bytes67orr r1, r1, r1, lsl#1668mov r5,r169mov r6,r170mov r7,r171724:73subs r4,r4,#1674stmia r3!,{r1,r5,r6,r7}75bne 4b76and r2,r2,#157778@ At this point we're still aligned and we have upto align-1 bytes left to right79@ we can avoid some of the byte-at-a time now by testing for some big chunks80tst r2,#881itt ne82subne r2,r2,#883stmiane r3!,{r1,r5}84855:86pop {r4,r5,r6,r7}87cbz r2, 10f8889@ Got to do any last < alignment bytes906:91subs r2,r2,#192strb r1,[r3],#193bne 6b949510:96bx lr @ goodbye97.size __memset_arm, . - __memset_arm9899100