Path: blob/main/contrib/arm-optimized-routines/string/arm/strcpy.c
39556 views
/*1* strcpy2*3* Copyright (c) 2008-2020, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#if defined (__thumb2__) && !defined (__thumb__)89/* For GLIBC:10#include <string.h>11#include <memcopy.h>1213#undef strcmp14*/1516#ifdef __thumb2__17#define magic1(REG) "#0x01010101"18#define magic2(REG) "#0x80808080"19#else20#define magic1(REG) #REG21#define magic2(REG) #REG ", lsl #7"22#endif2324char* __attribute__((naked))25__strcpy_arm (char* dst, const char* src)26{27__asm__ (28"pld [r1, #0]\n\t"29"eor r2, r0, r1\n\t"30"mov ip, r0\n\t"31"tst r2, #3\n\t"32"bne 4f\n\t"33"tst r1, #3\n\t"34"bne 3f\n"35"5:\n\t"36# ifndef __thumb2__37"str r5, [sp, #-4]!\n\t"38"mov r5, #0x01\n\t"39"orr r5, r5, r5, lsl #8\n\t"40"orr r5, r5, r5, lsl #16\n\t"41# endif4243"str r4, [sp, #-4]!\n\t"44"tst r1, #4\n\t"45"ldr r3, [r1], #4\n\t"46"beq 2f\n\t"47"sub r2, r3, "magic1(r5)"\n\t"48"bics r2, r2, r3\n\t"49"tst r2, "magic2(r5)"\n\t"50"itt eq\n\t"51"streq r3, [ip], #4\n\t"52"ldreq r3, [r1], #4\n"53"bne 1f\n\t"54/* Inner loop. We now know that r1 is 64-bit aligned, so we55can safely fetch up to two words. This allows us to avoid56load stalls. */57".p2align 2\n"58"2:\n\t"59"pld [r1, #8]\n\t"60"ldr r4, [r1], #4\n\t"61"sub r2, r3, "magic1(r5)"\n\t"62"bics r2, r2, r3\n\t"63"tst r2, "magic2(r5)"\n\t"64"sub r2, r4, "magic1(r5)"\n\t"65"bne 1f\n\t"66"str r3, [ip], #4\n\t"67"bics r2, r2, r4\n\t"68"tst r2, "magic2(r5)"\n\t"69"itt eq\n\t"70"ldreq r3, [r1], #4\n\t"71"streq r4, [ip], #4\n\t"72"beq 2b\n\t"73"mov r3, r4\n"74"1:\n\t"75# ifdef __ARMEB__76"rors r3, r3, #24\n\t"77# endif78"strb r3, [ip], #1\n\t"79"tst r3, #0xff\n\t"80# ifdef __ARMEL__81"ror r3, r3, #8\n\t"82# endif83"bne 1b\n\t"84"ldr r4, [sp], #4\n\t"85# ifndef __thumb2__86"ldr r5, [sp], #4\n\t"87# endif88"BX LR\n"8990/* Strings have the same offset from word alignment, but it's91not zero. */92"3:\n\t"93"tst r1, #1\n\t"94"beq 1f\n\t"95"ldrb r2, [r1], #1\n\t"96"strb r2, [ip], #1\n\t"97"cmp r2, #0\n\t"98"it eq\n"99"BXEQ LR\n"100"1:\n\t"101"tst r1, #2\n\t"102"beq 5b\n\t"103"ldrh r2, [r1], #2\n\t"104# ifdef __ARMEB__105"tst r2, #0xff00\n\t"106"iteet ne\n\t"107"strneh r2, [ip], #2\n\t"108"lsreq r2, r2, #8\n\t"109"streqb r2, [ip]\n\t"110"tstne r2, #0xff\n\t"111# else112"tst r2, #0xff\n\t"113"itet ne\n\t"114"strneh r2, [ip], #2\n\t"115"streqb r2, [ip]\n\t"116"tstne r2, #0xff00\n\t"117# endif118"bne 5b\n\t"119"BX LR\n"120121/* src and dst do not have a common word-alignement. Fall back to122byte copying. */123"4:\n\t"124"ldrb r2, [r1], #1\n\t"125"strb r2, [ip], #1\n\t"126"cmp r2, #0\n\t"127"bne 4b\n\t"128"BX LR");129}130/* For GLIBC: libc_hidden_builtin_def (strcpy) */131132#endif /* defined (__thumb2__) && !defined (__thumb__) */133134135