/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */12/*3* Copyright 2003 Wasabi Systems, Inc.4* All rights reserved.5*6* Written by Steve C. Woodford for Wasabi Systems, Inc.7*8* Redistribution and use in source and binary forms, with or without9* modification, are permitted provided that the following conditions10* are met:11* 1. Redistributions of source code must retain the above copyright12* notice, this list of conditions and the following disclaimer.13* 2. Redistributions in binary form must reproduce the above copyright14* notice, this list of conditions and the following disclaimer in the15* documentation and/or other materials provided with the distribution.16* 3. All advertising materials mentioning features or use of this software17* must display the following acknowledgement:18* This product includes software developed for the NetBSD Project by19* Wasabi Systems, Inc.20* 4. The name of Wasabi Systems, Inc. may not be used to endorse21* or promote products derived from this software without specific prior22* written permission.23*24* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND25* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED26* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR27* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC28* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR29* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF30* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS31* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN32* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)33* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE34* POSSIBILITY OF SUCH DAMAGE.35*/3637#include <machine/asm.h>38.syntax unified3940/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */41ENTRY(memcpy)42pld [r1]43cmp r2, #0x0c44ble .Lmemcpy_short /* <= 12 bytes */45mov r3, r0 /* We must not clobber r0 */4647/* Word-align the destination buffer */48ands ip, r3, #0x03 /* Already word aligned? */49beq .Lmemcpy_wordaligned /* Yup */50cmp ip, #0x0251ldrb ip, [r1], #0x0152sub r2, r2, #0x0153strb ip, [r3], #0x0154ldrble ip, [r1], #0x0155suble r2, r2, #0x0156strble ip, [r3], #0x0157ldrblt ip, [r1], #0x0158sublt r2, r2, #0x0159strblt ip, [r3], #0x016061/* Destination buffer is now word aligned */62.Lmemcpy_wordaligned:63ands ip, r1, #0x03 /* Is src also word-aligned? */64bne .Lmemcpy_bad_align /* Nope. Things just got bad */6566/* Quad-align the destination buffer */67tst r3, #0x07 /* Already quad aligned? */68ldrne ip, [r1], #0x0469stmfd sp!, {r4-r9} /* Free up some registers */70subne r2, r2, #0x0471strne ip, [r3], #0x047273/* Destination buffer quad aligned, source is at least word aligned */74subs r2, r2, #0x8075blt .Lmemcpy_w_lessthan1287677/* Copy 128 bytes at a time */78.Lmemcpy_w_loop128:79ldr r4, [r1], #0x04 /* LD:00-03 */80ldr r5, [r1], #0x04 /* LD:04-07 */81pld [r1, #0x18] /* Prefetch 0x20 */82ldr r6, [r1], #0x04 /* LD:08-0b */83ldr r7, [r1], #0x04 /* LD:0c-0f */84ldr r8, [r1], #0x04 /* LD:10-13 */85ldr r9, [r1], #0x04 /* LD:14-17 */86strd r4, [r3], #0x08 /* ST:00-07 */87ldr r4, [r1], #0x04 /* LD:18-1b */88ldr r5, [r1], #0x04 /* LD:1c-1f */89strd r6, [r3], #0x08 /* ST:08-0f */90ldr r6, [r1], #0x04 /* LD:20-23 */91ldr r7, [r1], #0x04 /* LD:24-27 */92pld [r1, #0x18] /* Prefetch 0x40 */93strd r8, [r3], #0x08 /* ST:10-17 */94ldr r8, [r1], #0x04 /* LD:28-2b */95ldr r9, [r1], #0x04 /* LD:2c-2f */96strd r4, [r3], #0x08 /* ST:18-1f */97ldr r4, [r1], #0x04 /* LD:30-33 */98ldr r5, [r1], #0x04 /* LD:34-37 */99strd r6, [r3], #0x08 /* ST:20-27 */100ldr r6, [r1], #0x04 /* LD:38-3b */101ldr r7, [r1], #0x04 /* LD:3c-3f */102strd r8, [r3], #0x08 /* ST:28-2f */103ldr r8, [r1], #0x04 /* LD:40-43 */104ldr r9, [r1], #0x04 /* LD:44-47 */105pld [r1, #0x18] /* Prefetch 0x60 */106strd r4, [r3], #0x08 /* ST:30-37 */107ldr r4, [r1], #0x04 /* LD:48-4b */108ldr r5, [r1], #0x04 /* LD:4c-4f */109strd r6, [r3], #0x08 /* ST:38-3f */110ldr r6, [r1], #0x04 /* LD:50-53 */111ldr r7, [r1], #0x04 /* LD:54-57 */112strd r8, [r3], #0x08 /* ST:40-47 */113ldr r8, [r1], #0x04 /* LD:58-5b */114ldr r9, [r1], #0x04 /* LD:5c-5f */115strd r4, [r3], #0x08 /* ST:48-4f */116ldr r4, [r1], #0x04 /* LD:60-63 */117ldr r5, [r1], #0x04 /* LD:64-67 */118pld [r1, #0x18] /* Prefetch 0x80 */119strd r6, [r3], #0x08 /* ST:50-57 */120ldr r6, [r1], #0x04 /* LD:68-6b */121ldr r7, [r1], #0x04 /* LD:6c-6f */122strd r8, [r3], #0x08 /* ST:58-5f */123ldr r8, [r1], #0x04 /* LD:70-73 */124ldr r9, [r1], #0x04 /* LD:74-77 */125strd r4, [r3], #0x08 /* ST:60-67 */126ldr r4, [r1], #0x04 /* LD:78-7b */127ldr r5, [r1], #0x04 /* LD:7c-7f */128strd r6, [r3], #0x08 /* ST:68-6f */129strd r8, [r3], #0x08 /* ST:70-77 */130subs r2, r2, #0x80131strd r4, [r3], #0x08 /* ST:78-7f */132bge .Lmemcpy_w_loop128133134.Lmemcpy_w_lessthan128:135adds r2, r2, #0x80 /* Adjust for extra sub */136ldmfdeq sp!, {r4-r9}137bxeq lr /* Return now if done */138subs r2, r2, #0x20139blt .Lmemcpy_w_lessthan32140141/* Copy 32 bytes at a time */142.Lmemcpy_w_loop32:143ldr r4, [r1], #0x04144ldr r5, [r1], #0x04145pld [r1, #0x18]146ldr r6, [r1], #0x04147ldr r7, [r1], #0x04148ldr r8, [r1], #0x04149ldr r9, [r1], #0x04150strd r4, [r3], #0x08151ldr r4, [r1], #0x04152ldr r5, [r1], #0x04153strd r6, [r3], #0x08154strd r8, [r3], #0x08155subs r2, r2, #0x20156strd r4, [r3], #0x08157bge .Lmemcpy_w_loop32158159.Lmemcpy_w_lessthan32:160adds r2, r2, #0x20 /* Adjust for extra sub */161ldmfdeq sp!, {r4-r9}162bxeq lr /* Return now if done */163164and r4, r2, #0x18165rsbs r4, r4, #0x18166addne pc, pc, r4, lsl #1167nop168169/* At least 24 bytes remaining */170ldr r4, [r1], #0x04171ldr r5, [r1], #0x04172sub r2, r2, #0x08173strd r4, [r3], #0x08174175/* At least 16 bytes remaining */176ldr r4, [r1], #0x04177ldr r5, [r1], #0x04178sub r2, r2, #0x08179strd r4, [r3], #0x08180181/* At least 8 bytes remaining */182ldr r4, [r1], #0x04183ldr r5, [r1], #0x04184subs r2, r2, #0x08185strd r4, [r3], #0x08186187/* Less than 8 bytes remaining */188ldmfd sp!, {r4-r9}189bxeq lr /* Return now if done */190subs r2, r2, #0x04191ldrge ip, [r1], #0x04192strge ip, [r3], #0x04193bxeq lr /* Return now if done */194addlt r2, r2, #0x04195ldrb ip, [r1], #0x01196cmp r2, #0x02197ldrbge r2, [r1], #0x01198strb ip, [r3], #0x01199ldrbgt ip, [r1]200strbge r2, [r3], #0x01201strbgt ip, [r3]202bx lr203204205/*206* At this point, it has not been possible to word align both buffers.207* The destination buffer is word aligned, but the source buffer is not.208*/209.Lmemcpy_bad_align:210stmfd sp!, {r4-r7}211bic r1, r1, #0x03212cmp ip, #2213ldr ip, [r1], #0x04214bgt .Lmemcpy_bad3215beq .Lmemcpy_bad2216b .Lmemcpy_bad1217218.Lmemcpy_bad1_loop16:219mov r4, ip, lsr #8220ldr r5, [r1], #0x04221pld [r1, #0x018]222ldr r6, [r1], #0x04223ldr r7, [r1], #0x04224ldr ip, [r1], #0x04225orr r4, r4, r5, lsl #24226mov r5, r5, lsr #8227orr r5, r5, r6, lsl #24228mov r6, r6, lsr #8229orr r6, r6, r7, lsl #24230mov r7, r7, lsr #8231orr r7, r7, ip, lsl #24232str r4, [r3], #0x04233str r5, [r3], #0x04234str r6, [r3], #0x04235str r7, [r3], #0x04236.Lmemcpy_bad1:237subs r2, r2, #0x10238bge .Lmemcpy_bad1_loop16239240adds r2, r2, #0x10241ldmfdeq sp!, {r4-r7}242bxeq lr /* Return now if done */243subs r2, r2, #0x04244sublt r1, r1, #0x03245blt .Lmemcpy_bad_done246247.Lmemcpy_bad1_loop4:248mov r4, ip, lsr #8249ldr ip, [r1], #0x04250subs r2, r2, #0x04251orr r4, r4, ip, lsl #24252str r4, [r3], #0x04253bge .Lmemcpy_bad1_loop4254sub r1, r1, #0x03255b .Lmemcpy_bad_done256257.Lmemcpy_bad2_loop16:258mov r4, ip, lsr #16259ldr r5, [r1], #0x04260pld [r1, #0x018]261ldr r6, [r1], #0x04262ldr r7, [r1], #0x04263ldr ip, [r1], #0x04264orr r4, r4, r5, lsl #16265mov r5, r5, lsr #16266orr r5, r5, r6, lsl #16267mov r6, r6, lsr #16268orr r6, r6, r7, lsl #16269mov r7, r7, lsr #16270orr r7, r7, ip, lsl #16271str r4, [r3], #0x04272str r5, [r3], #0x04273str r6, [r3], #0x04274str r7, [r3], #0x04275.Lmemcpy_bad2:276subs r2, r2, #0x10277bge .Lmemcpy_bad2_loop16278279adds r2, r2, #0x10280ldmfdeq sp!, {r4-r7}281bxeq lr /* Return now if done */282subs r2, r2, #0x04283sublt r1, r1, #0x02284blt .Lmemcpy_bad_done285286.Lmemcpy_bad2_loop4:287mov r4, ip, lsr #16288ldr ip, [r1], #0x04289subs r2, r2, #0x04290orr r4, r4, ip, lsl #16291str r4, [r3], #0x04292bge .Lmemcpy_bad2_loop4293sub r1, r1, #0x02294b .Lmemcpy_bad_done295296.Lmemcpy_bad3_loop16:297mov r4, ip, lsr #24298ldr r5, [r1], #0x04299pld [r1, #0x018]300ldr r6, [r1], #0x04301ldr r7, [r1], #0x04302ldr ip, [r1], #0x04303orr r4, r4, r5, lsl #8304mov r5, r5, lsr #24305orr r5, r5, r6, lsl #8306mov r6, r6, lsr #24307orr r6, r6, r7, lsl #8308mov r7, r7, lsr #24309orr r7, r7, ip, lsl #8310str r4, [r3], #0x04311str r5, [r3], #0x04312str r6, [r3], #0x04313str r7, [r3], #0x04314.Lmemcpy_bad3:315subs r2, r2, #0x10316bge .Lmemcpy_bad3_loop16317318adds r2, r2, #0x10319ldmfdeq sp!, {r4-r7}320bxeq lr /* Return now if done */321subs r2, r2, #0x04322sublt r1, r1, #0x01323blt .Lmemcpy_bad_done324325.Lmemcpy_bad3_loop4:326mov r4, ip, lsr #24327ldr ip, [r1], #0x04328subs r2, r2, #0x04329orr r4, r4, ip, lsl #8330str r4, [r3], #0x04331bge .Lmemcpy_bad3_loop4332sub r1, r1, #0x01333334.Lmemcpy_bad_done:335ldmfd sp!, {r4-r7}336adds r2, r2, #0x04337bxeq lr338ldrb ip, [r1], #0x01339cmp r2, #0x02340ldrbge r2, [r1], #0x01341strb ip, [r3], #0x01342ldrbgt ip, [r1]343strbge r2, [r3], #0x01344strbgt ip, [r3]345bx lr346347348/*349* Handle short copies (less than 16 bytes), possibly misaligned.350* Some of these are *very* common, thanks to the network stack,351* and so are handled specially.352*/353.Lmemcpy_short:354#ifndef _STANDALONE355add pc, pc, r2, lsl #2356nop357bx lr /* 0x00 */358b .Lmemcpy_bytewise /* 0x01 */359b .Lmemcpy_bytewise /* 0x02 */360b .Lmemcpy_bytewise /* 0x03 */361b .Lmemcpy_4 /* 0x04 */362b .Lmemcpy_bytewise /* 0x05 */363b .Lmemcpy_6 /* 0x06 */364b .Lmemcpy_bytewise /* 0x07 */365b .Lmemcpy_8 /* 0x08 */366b .Lmemcpy_bytewise /* 0x09 */367b .Lmemcpy_bytewise /* 0x0a */368b .Lmemcpy_bytewise /* 0x0b */369b .Lmemcpy_c /* 0x0c */370#endif371.Lmemcpy_bytewise:372mov r3, r0 /* We must not clobber r0 */373ldrb ip, [r1], #0x013741: subs r2, r2, #0x01375strb ip, [r3], #0x01376ldrbne ip, [r1], #0x01377bne 1b378bx lr379380#ifndef _STANDALONE381/******************************************************************************382* Special case for 4 byte copies383*/384#define LMEMCPY_4_LOG2 6 /* 64 bytes */385#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2386LMEMCPY_4_PAD387.Lmemcpy_4:388and r2, r1, #0x03389orr r2, r2, r0, lsl #2390ands r2, r2, #0x0f391sub r3, pc, #0x14392addne pc, r3, r2, lsl #LMEMCPY_4_LOG2393394/*395* 0000: dst is 32-bit aligned, src is 32-bit aligned396*/397ldr r2, [r1]398str r2, [r0]399bx lr400LMEMCPY_4_PAD401402/*403* 0001: dst is 32-bit aligned, src is 8-bit aligned404*/405ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */406ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */407mov r3, r3, lsr #8 /* r3 = .210 */408orr r3, r3, r2, lsl #24 /* r3 = 3210 */409str r3, [r0]410bx lr411LMEMCPY_4_PAD412413/*414* 0010: dst is 32-bit aligned, src is 16-bit aligned415*/416ldrh r3, [r1, #0x02]417ldrh r2, [r1]418orr r3, r2, r3, lsl #16419str r3, [r0]420bx lr421LMEMCPY_4_PAD422423/*424* 0011: dst is 32-bit aligned, src is 8-bit aligned425*/426ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */427ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */428mov r3, r3, lsr #24 /* r3 = ...0 */429orr r3, r3, r2, lsl #8 /* r3 = 3210 */430str r3, [r0]431bx lr432LMEMCPY_4_PAD433434/*435* 0100: dst is 8-bit aligned, src is 32-bit aligned436*/437ldr r2, [r1]438strb r2, [r0]439mov r3, r2, lsr #8440mov r1, r2, lsr #24441strb r1, [r0, #0x03]442strh r3, [r0, #0x01]443bx lr444LMEMCPY_4_PAD445446/*447* 0101: dst is 8-bit aligned, src is 8-bit aligned448*/449ldrb r2, [r1]450ldrh r3, [r1, #0x01]451ldrb r1, [r1, #0x03]452strb r2, [r0]453strh r3, [r0, #0x01]454strb r1, [r0, #0x03]455bx lr456LMEMCPY_4_PAD457458/*459* 0110: dst is 8-bit aligned, src is 16-bit aligned460*/461ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */462ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */463strb r2, [r0]464mov r2, r2, lsr #8 /* r2 = ...1 */465orr r2, r2, r3, lsl #8 /* r2 = .321 */466mov r3, r3, lsr #8 /* r3 = ...3 */467strh r2, [r0, #0x01]468strb r3, [r0, #0x03]469bx lr470LMEMCPY_4_PAD471472/*473* 0111: dst is 8-bit aligned, src is 8-bit aligned474*/475ldrb r2, [r1]476ldrh r3, [r1, #0x01]477ldrb r1, [r1, #0x03]478strb r2, [r0]479strh r3, [r0, #0x01]480strb r1, [r0, #0x03]481bx lr482LMEMCPY_4_PAD483484/*485* 1000: dst is 16-bit aligned, src is 32-bit aligned486*/487ldr r2, [r1]488strh r2, [r0]489mov r3, r2, lsr #16490strh r3, [r0, #0x02]491bx lr492LMEMCPY_4_PAD493494/*495* 1001: dst is 16-bit aligned, src is 8-bit aligned496*/497ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */498ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */499mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */500strh r1, [r0]501mov r2, r2, lsr #24 /* r2 = ...2 */502orr r2, r2, r3, lsl #8 /* r2 = xx32 */503strh r2, [r0, #0x02]504bx lr505LMEMCPY_4_PAD506507/*508* 1010: dst is 16-bit aligned, src is 16-bit aligned509*/510ldrh r2, [r1]511ldrh r3, [r1, #0x02]512strh r2, [r0]513strh r3, [r0, #0x02]514bx lr515LMEMCPY_4_PAD516517/*518* 1011: dst is 16-bit aligned, src is 8-bit aligned519*/520ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */521ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */522mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */523strh r1, [r0, #0x02]524mov r3, r3, lsl #8 /* r3 = 321. */525orr r3, r3, r2, lsr #24 /* r3 = 3210 */526strh r3, [r0]527bx lr528LMEMCPY_4_PAD529530/*531* 1100: dst is 8-bit aligned, src is 32-bit aligned532*/533ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */534strb r2, [r0]535mov r3, r2, lsr #8536mov r1, r2, lsr #24537strh r3, [r0, #0x01]538strb r1, [r0, #0x03]539bx lr540LMEMCPY_4_PAD541542/*543* 1101: dst is 8-bit aligned, src is 8-bit aligned544*/545ldrb r2, [r1]546ldrh r3, [r1, #0x01]547ldrb r1, [r1, #0x03]548strb r2, [r0]549strh r3, [r0, #0x01]550strb r1, [r0, #0x03]551bx lr552LMEMCPY_4_PAD553554/*555* 1110: dst is 8-bit aligned, src is 16-bit aligned556*/557ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */558ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */559strb r2, [r0]560mov r2, r2, lsr #8 /* r2 = ...1 */561orr r2, r2, r3, lsl #8 /* r2 = .321 */562strh r2, [r0, #0x01]563mov r3, r3, lsr #8 /* r3 = ...3 */564strb r3, [r0, #0x03]565bx lr566LMEMCPY_4_PAD567568/*569* 1111: dst is 8-bit aligned, src is 8-bit aligned570*/571ldrb r2, [r1]572ldrh r3, [r1, #0x01]573ldrb r1, [r1, #0x03]574strb r2, [r0]575strh r3, [r0, #0x01]576strb r1, [r0, #0x03]577bx lr578LMEMCPY_4_PAD579580581/******************************************************************************582* Special case for 6 byte copies583*/584#define LMEMCPY_6_LOG2 6 /* 64 bytes */585#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2586LMEMCPY_6_PAD587.Lmemcpy_6:588and r2, r1, #0x03589orr r2, r2, r0, lsl #2590ands r2, r2, #0x0f591sub r3, pc, #0x14592addne pc, r3, r2, lsl #LMEMCPY_6_LOG2593594/*595* 0000: dst is 32-bit aligned, src is 32-bit aligned596*/597ldr r2, [r1]598ldrh r3, [r1, #0x04]599str r2, [r0]600strh r3, [r0, #0x04]601bx lr602LMEMCPY_6_PAD603604/*605* 0001: dst is 32-bit aligned, src is 8-bit aligned606*/607ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */608ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */609mov r2, r2, lsr #8 /* r2 = .210 */610orr r2, r2, r3, lsl #24 /* r2 = 3210 */611mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */612str r2, [r0]613strh r3, [r0, #0x04]614bx lr615LMEMCPY_6_PAD616617/*618* 0010: dst is 32-bit aligned, src is 16-bit aligned619*/620ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */621ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */622mov r1, r3, lsr #16 /* r1 = ..54 */623orr r2, r2, r3, lsl #16 /* r2 = 3210 */624str r2, [r0]625strh r1, [r0, #0x04]626bx lr627LMEMCPY_6_PAD628629/*630* 0011: dst is 32-bit aligned, src is 8-bit aligned631*/632ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */633ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */634ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */635mov r2, r2, lsr #24 /* r2 = ...0 */636orr r2, r2, r3, lsl #8 /* r2 = 3210 */637mov r1, r1, lsl #8 /* r1 = xx5. */638orr r1, r1, r3, lsr #24 /* r1 = xx54 */639str r2, [r0]640strh r1, [r0, #0x04]641bx lr642LMEMCPY_6_PAD643644/*645* 0100: dst is 8-bit aligned, src is 32-bit aligned646*/647ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */648ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */649mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */650strh r1, [r0, #0x01]651strb r3, [r0]652mov r3, r3, lsr #24 /* r3 = ...3 */653orr r3, r3, r2, lsl #8 /* r3 = .543 */654mov r2, r2, lsr #8 /* r2 = ...5 */655strh r3, [r0, #0x03]656strb r2, [r0, #0x05]657bx lr658LMEMCPY_6_PAD659660/*661* 0101: dst is 8-bit aligned, src is 8-bit aligned662*/663ldrb r2, [r1]664ldrh r3, [r1, #0x01]665ldrh ip, [r1, #0x03]666ldrb r1, [r1, #0x05]667strb r2, [r0]668strh r3, [r0, #0x01]669strh ip, [r0, #0x03]670strb r1, [r0, #0x05]671bx lr672LMEMCPY_6_PAD673674/*675* 0110: dst is 8-bit aligned, src is 16-bit aligned676*/677ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */678ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */679strb r2, [r0]680mov r3, r1, lsr #24681strb r3, [r0, #0x05]682mov r3, r1, lsr #8 /* r3 = .543 */683strh r3, [r0, #0x03]684mov r3, r2, lsr #8 /* r3 = ...1 */685orr r3, r3, r1, lsl #8 /* r3 = 4321 */686strh r3, [r0, #0x01]687bx lr688LMEMCPY_6_PAD689690/*691* 0111: dst is 8-bit aligned, src is 8-bit aligned692*/693ldrb r2, [r1]694ldrh r3, [r1, #0x01]695ldrh ip, [r1, #0x03]696ldrb r1, [r1, #0x05]697strb r2, [r0]698strh r3, [r0, #0x01]699strh ip, [r0, #0x03]700strb r1, [r0, #0x05]701bx lr702LMEMCPY_6_PAD703704/*705* 1000: dst is 16-bit aligned, src is 32-bit aligned706*/707ldrh r2, [r1, #0x04] /* r2 = ..54 */708ldr r3, [r1] /* r3 = 3210 */709mov r2, r2, lsl #16 /* r2 = 54.. */710orr r2, r2, r3, lsr #16 /* r2 = 5432 */711strh r3, [r0]712str r2, [r0, #0x02]713bx lr714LMEMCPY_6_PAD715716/*717* 1001: dst is 16-bit aligned, src is 8-bit aligned718*/719ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */720ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */721mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */722mov r2, r2, lsl #8 /* r2 = 543. */723orr r2, r2, r3, lsr #24 /* r2 = 5432 */724strh r1, [r0]725str r2, [r0, #0x02]726bx lr727LMEMCPY_6_PAD728729/*730* 1010: dst is 16-bit aligned, src is 16-bit aligned731*/732ldrh r2, [r1]733ldr r3, [r1, #0x02]734strh r2, [r0]735str r3, [r0, #0x02]736bx lr737LMEMCPY_6_PAD738739/*740* 1011: dst is 16-bit aligned, src is 8-bit aligned741*/742ldrb r3, [r1] /* r3 = ...0 */743ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */744ldrb r1, [r1, #0x05] /* r1 = ...5 */745orr r3, r3, r2, lsl #8 /* r3 = 3210 */746mov r1, r1, lsl #24 /* r1 = 5... */747orr r1, r1, r2, lsr #8 /* r1 = 5432 */748strh r3, [r0]749str r1, [r0, #0x02]750bx lr751LMEMCPY_6_PAD752753/*754* 1100: dst is 8-bit aligned, src is 32-bit aligned755*/756ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */757ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */758strb r2, [r0]759mov r2, r2, lsr #8 /* r2 = .321 */760orr r2, r2, r1, lsl #24 /* r2 = 4321 */761mov r1, r1, lsr #8 /* r1 = ...5 */762str r2, [r0, #0x01]763strb r1, [r0, #0x05]764bx lr765LMEMCPY_6_PAD766767/*768* 1101: dst is 8-bit aligned, src is 8-bit aligned769*/770ldrb r2, [r1]771ldrh r3, [r1, #0x01]772ldrh ip, [r1, #0x03]773ldrb r1, [r1, #0x05]774strb r2, [r0]775strh r3, [r0, #0x01]776strh ip, [r0, #0x03]777strb r1, [r0, #0x05]778bx lr779LMEMCPY_6_PAD780781/*782* 1110: dst is 8-bit aligned, src is 16-bit aligned783*/784ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */785ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */786strb r2, [r0]787mov r2, r2, lsr #8 /* r2 = ...1 */788orr r2, r2, r1, lsl #8 /* r2 = 4321 */789mov r1, r1, lsr #24 /* r1 = ...5 */790str r2, [r0, #0x01]791strb r1, [r0, #0x05]792bx lr793LMEMCPY_6_PAD794795/*796* 1111: dst is 8-bit aligned, src is 8-bit aligned797*/798ldrb r2, [r1]799ldr r3, [r1, #0x01]800ldrb r1, [r1, #0x05]801strb r2, [r0]802str r3, [r0, #0x01]803strb r1, [r0, #0x05]804bx lr805LMEMCPY_6_PAD806807808/******************************************************************************809* Special case for 8 byte copies810*/811#define LMEMCPY_8_LOG2 6 /* 64 bytes */812#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2813LMEMCPY_8_PAD814.Lmemcpy_8:815and r2, r1, #0x03816orr r2, r2, r0, lsl #2817ands r2, r2, #0x0f818sub r3, pc, #0x14819addne pc, r3, r2, lsl #LMEMCPY_8_LOG2820821/*822* 0000: dst is 32-bit aligned, src is 32-bit aligned823*/824ldr r2, [r1]825ldr r3, [r1, #0x04]826str r2, [r0]827str r3, [r0, #0x04]828bx lr829LMEMCPY_8_PAD830831/*832* 0001: dst is 32-bit aligned, src is 8-bit aligned833*/834ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */835ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */836ldrb r1, [r1, #0x07] /* r1 = ...7 */837mov r3, r3, lsr #8 /* r3 = .210 */838orr r3, r3, r2, lsl #24 /* r3 = 3210 */839mov r1, r1, lsl #24 /* r1 = 7... */840orr r2, r1, r2, lsr #8 /* r2 = 7654 */841str r3, [r0]842str r2, [r0, #0x04]843bx lr844LMEMCPY_8_PAD845846/*847* 0010: dst is 32-bit aligned, src is 16-bit aligned848*/849ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */850ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */851ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */852orr r2, r2, r3, lsl #16 /* r2 = 3210 */853mov r3, r3, lsr #16 /* r3 = ..54 */854orr r3, r3, r1, lsl #16 /* r3 = 7654 */855str r2, [r0]856str r3, [r0, #0x04]857bx lr858LMEMCPY_8_PAD859860/*861* 0011: dst is 32-bit aligned, src is 8-bit aligned862*/863ldrb r3, [r1] /* r3 = ...0 */864ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */865ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */866orr r3, r3, r2, lsl #8 /* r3 = 3210 */867mov r2, r2, lsr #24 /* r2 = ...4 */868orr r2, r2, r1, lsl #8 /* r2 = 7654 */869str r3, [r0]870str r2, [r0, #0x04]871bx lr872LMEMCPY_8_PAD873874/*875* 0100: dst is 8-bit aligned, src is 32-bit aligned876*/877ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */878ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */879strb r3, [r0]880mov r1, r2, lsr #24 /* r1 = ...7 */881strb r1, [r0, #0x07]882mov r1, r3, lsr #8 /* r1 = .321 */883mov r3, r3, lsr #24 /* r3 = ...3 */884orr r3, r3, r2, lsl #8 /* r3 = 6543 */885strh r1, [r0, #0x01]886str r3, [r0, #0x03]887bx lr888LMEMCPY_8_PAD889890/*891* 0101: dst is 8-bit aligned, src is 8-bit aligned892*/893ldrb r2, [r1]894ldrh r3, [r1, #0x01]895ldr ip, [r1, #0x03]896ldrb r1, [r1, #0x07]897strb r2, [r0]898strh r3, [r0, #0x01]899str ip, [r0, #0x03]900strb r1, [r0, #0x07]901bx lr902LMEMCPY_8_PAD903904/*905* 0110: dst is 8-bit aligned, src is 16-bit aligned906*/907ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */908ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */909ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */910strb r2, [r0] /* 0 */911mov ip, r1, lsr #8 /* ip = ...7 */912strb ip, [r0, #0x07] /* 7 */913mov ip, r2, lsr #8 /* ip = ...1 */914orr ip, ip, r3, lsl #8 /* ip = 4321 */915mov r3, r3, lsr #8 /* r3 = .543 */916orr r3, r3, r1, lsl #24 /* r3 = 6543 */917strh ip, [r0, #0x01]918str r3, [r0, #0x03]919bx lr920LMEMCPY_8_PAD921922/*923* 0111: dst is 8-bit aligned, src is 8-bit aligned924*/925ldrb r3, [r1] /* r3 = ...0 */926ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */927ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */928ldrb r1, [r1, #0x07] /* r1 = ...7 */929strb r3, [r0]930mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */931strh ip, [r0, #0x01]932orr r2, r3, r2, lsl #16 /* r2 = 6543 */933str r2, [r0, #0x03]934strb r1, [r0, #0x07]935bx lr936LMEMCPY_8_PAD937938/*939* 1000: dst is 16-bit aligned, src is 32-bit aligned940*/941ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */942ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */943mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */944strh r2, [r0]945orr r2, r1, r3, lsl #16 /* r2 = 5432 */946mov r3, r3, lsr #16 /* r3 = ..76 */947str r2, [r0, #0x02]948strh r3, [r0, #0x06]949bx lr950LMEMCPY_8_PAD951952/*953* 1001: dst is 16-bit aligned, src is 8-bit aligned954*/955ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */956ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */957ldrb ip, [r1, #0x07] /* ip = ...7 */958mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */959strh r1, [r0]960mov r1, r2, lsr #24 /* r1 = ...2 */961orr r1, r1, r3, lsl #8 /* r1 = 5432 */962mov r3, r3, lsr #24 /* r3 = ...6 */963orr r3, r3, ip, lsl #8 /* r3 = ..76 */964str r1, [r0, #0x02]965strh r3, [r0, #0x06]966bx lr967LMEMCPY_8_PAD968969/*970* 1010: dst is 16-bit aligned, src is 16-bit aligned971*/972ldrh r2, [r1]973ldr ip, [r1, #0x02]974ldrh r3, [r1, #0x06]975strh r2, [r0]976str ip, [r0, #0x02]977strh r3, [r0, #0x06]978bx lr979LMEMCPY_8_PAD980981/*982* 1011: dst is 16-bit aligned, src is 8-bit aligned983*/984ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */985ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */986ldrb ip, [r1] /* ip = ...0 */987mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */988strh r1, [r0, #0x06]989mov r3, r3, lsl #24 /* r3 = 5... */990orr r3, r3, r2, lsr #8 /* r3 = 5432 */991orr r2, ip, r2, lsl #8 /* r2 = 3210 */992str r3, [r0, #0x02]993strh r2, [r0]994bx lr995LMEMCPY_8_PAD996997/*998* 1100: dst is 8-bit aligned, src is 32-bit aligned999*/1000ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */1001ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */1002mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */1003strh r1, [r0, #0x05]1004strb r2, [r0]1005mov r1, r3, lsr #24 /* r1 = ...7 */1006strb r1, [r0, #0x07]1007mov r2, r2, lsr #8 /* r2 = .321 */1008orr r2, r2, r3, lsl #24 /* r2 = 4321 */1009str r2, [r0, #0x01]1010bx lr1011LMEMCPY_8_PAD10121013/*1014* 1101: dst is 8-bit aligned, src is 8-bit aligned1015*/1016ldrb r3, [r1] /* r3 = ...0 */1017ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */1018ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */1019ldrb r1, [r1, #0x07] /* r1 = ...7 */1020strb r3, [r0]1021mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */1022strh r3, [r0, #0x05]1023orr r2, r2, ip, lsl #16 /* r2 = 4321 */1024str r2, [r0, #0x01]1025strb r1, [r0, #0x07]1026bx lr1027LMEMCPY_8_PAD10281029/*1030* 1110: dst is 8-bit aligned, src is 16-bit aligned1031*/1032ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */1033ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */1034ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */1035strb r2, [r0]1036mov ip, r2, lsr #8 /* ip = ...1 */1037orr ip, ip, r3, lsl #8 /* ip = 4321 */1038mov r2, r1, lsr #8 /* r2 = ...7 */1039strb r2, [r0, #0x07]1040mov r1, r1, lsl #8 /* r1 = .76. */1041orr r1, r1, r3, lsr #24 /* r1 = .765 */1042str ip, [r0, #0x01]1043strh r1, [r0, #0x05]1044bx lr1045LMEMCPY_8_PAD10461047/*1048* 1111: dst is 8-bit aligned, src is 8-bit aligned1049*/1050ldrb r2, [r1]1051ldr ip, [r1, #0x01]1052ldrh r3, [r1, #0x05]1053ldrb r1, [r1, #0x07]1054strb r2, [r0]1055str ip, [r0, #0x01]1056strh r3, [r0, #0x05]1057strb r1, [r0, #0x07]1058bx lr1059LMEMCPY_8_PAD10601061/******************************************************************************1062* Special case for 12 byte copies1063*/1064#define LMEMCPY_C_LOG2 7 /* 128 bytes */1065#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG21066LMEMCPY_C_PAD1067.Lmemcpy_c:1068and r2, r1, #0x031069orr r2, r2, r0, lsl #21070ands r2, r2, #0x0f1071sub r3, pc, #0x141072addne pc, r3, r2, lsl #LMEMCPY_C_LOG210731074/*1075* 0000: dst is 32-bit aligned, src is 32-bit aligned1076*/1077ldr r2, [r1]1078ldr r3, [r1, #0x04]1079ldr r1, [r1, #0x08]1080str r2, [r0]1081str r3, [r0, #0x04]1082str r1, [r0, #0x08]1083bx lr1084LMEMCPY_C_PAD10851086/*1087* 0001: dst is 32-bit aligned, src is 8-bit aligned1088*/1089ldrb r2, [r1, #0xb] /* r2 = ...B */1090ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */1091ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */1092ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */1093mov r2, r2, lsl #24 /* r2 = B... */1094orr r2, r2, ip, lsr #8 /* r2 = BA98 */1095str r2, [r0, #0x08]1096mov r2, ip, lsl #24 /* r2 = 7... */1097orr r2, r2, r3, lsr #8 /* r2 = 7654 */1098mov r1, r1, lsr #8 /* r1 = .210 */1099orr r1, r1, r3, lsl #24 /* r1 = 3210 */1100str r2, [r0, #0x04]1101str r1, [r0]1102bx lr1103LMEMCPY_C_PAD11041105/*1106* 0010: dst is 32-bit aligned, src is 16-bit aligned1107*/1108ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */1109ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */1110ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */1111ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */1112orr r2, r2, r3, lsl #16 /* r2 = 3210 */1113str r2, [r0]1114mov r3, r3, lsr #16 /* r3 = ..54 */1115orr r3, r3, ip, lsl #16 /* r3 = 7654 */1116mov r1, r1, lsl #16 /* r1 = BA.. */1117orr r1, r1, ip, lsr #16 /* r1 = BA98 */1118str r3, [r0, #0x04]1119str r1, [r0, #0x08]1120bx lr1121LMEMCPY_C_PAD11221123/*1124* 0011: dst is 32-bit aligned, src is 8-bit aligned1125*/1126ldrb r2, [r1] /* r2 = ...0 */1127ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */1128ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */1129ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */1130orr r2, r2, r3, lsl #8 /* r2 = 3210 */1131str r2, [r0]1132mov r3, r3, lsr #24 /* r3 = ...4 */1133orr r3, r3, ip, lsl #8 /* r3 = 7654 */1134mov r1, r1, lsl #8 /* r1 = BA9. */1135orr r1, r1, ip, lsr #24 /* r1 = BA98 */1136str r3, [r0, #0x04]1137str r1, [r0, #0x08]1138bx lr1139LMEMCPY_C_PAD11401141/*1142* 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned1143*/1144ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */1145ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */1146ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */1147mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */1148strh r1, [r0, #0x01]1149strb r2, [r0]1150mov r1, r2, lsr #24 /* r1 = ...3 */1151orr r2, r1, r3, lsl #8 /* r1 = 6543 */1152mov r1, r3, lsr #24 /* r1 = ...7 */1153orr r1, r1, ip, lsl #8 /* r1 = A987 */1154mov ip, ip, lsr #24 /* ip = ...B */1155str r2, [r0, #0x03]1156str r1, [r0, #0x07]1157strb ip, [r0, #0x0b]1158bx lr1159LMEMCPY_C_PAD11601161/*1162* 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)1163*/1164ldrb r2, [r1]1165ldrh r3, [r1, #0x01]1166ldr ip, [r1, #0x03]1167strb r2, [r0]1168ldr r2, [r1, #0x07]1169ldrb r1, [r1, #0x0b]1170strh r3, [r0, #0x01]1171str ip, [r0, #0x03]1172str r2, [r0, #0x07]1173strb r1, [r0, #0x0b]1174bx lr1175LMEMCPY_C_PAD11761177/*1178* 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned1179*/1180ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */1181ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */1182ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */1183ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */1184strb r2, [r0]1185mov r2, r2, lsr #8 /* r2 = ...1 */1186orr r2, r2, r3, lsl #8 /* r2 = 4321 */1187strh r2, [r0, #0x01]1188mov r2, r3, lsr #8 /* r2 = .543 */1189orr r3, r2, ip, lsl #24 /* r3 = 6543 */1190mov r2, ip, lsr #8 /* r2 = .987 */1191orr r2, r2, r1, lsl #24 /* r2 = A987 */1192mov r1, r1, lsr #8 /* r1 = ...B */1193str r3, [r0, #0x03]1194str r2, [r0, #0x07]1195strb r1, [r0, #0x0b]1196bx lr1197LMEMCPY_C_PAD11981199/*1200* 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)1201*/1202ldrb r2, [r1]1203ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */1204ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */1205ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */1206strb r2, [r0]1207strh r3, [r0, #0x01]1208mov r3, r3, lsr #16 /* r3 = ..43 */1209orr r3, r3, ip, lsl #16 /* r3 = 6543 */1210mov ip, ip, lsr #16 /* ip = ..87 */1211orr ip, ip, r1, lsl #16 /* ip = A987 */1212mov r1, r1, lsr #16 /* r1 = ..xB */1213str r3, [r0, #0x03]1214str ip, [r0, #0x07]1215strb r1, [r0, #0x0b]1216bx lr1217LMEMCPY_C_PAD12181219/*1220* 1000: dst is 16-bit aligned, src is 32-bit aligned1221*/1222ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */1223ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */1224ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */1225mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */1226strh ip, [r0]1227orr r1, r1, r3, lsl #16 /* r1 = 5432 */1228mov r3, r3, lsr #16 /* r3 = ..76 */1229orr r3, r3, r2, lsl #16 /* r3 = 9876 */1230mov r2, r2, lsr #16 /* r2 = ..BA */1231str r1, [r0, #0x02]1232str r3, [r0, #0x06]1233strh r2, [r0, #0x0a]1234bx lr1235LMEMCPY_C_PAD12361237/*1238* 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)1239*/1240ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */1241ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */1242mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */1243strh ip, [r0]1244ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */1245ldrb r1, [r1, #0x0b] /* r1 = ...B */1246mov r2, r2, lsr #24 /* r2 = ...2 */1247orr r2, r2, r3, lsl #8 /* r2 = 5432 */1248mov r3, r3, lsr #24 /* r3 = ...6 */1249orr r3, r3, ip, lsl #8 /* r3 = 9876 */1250mov r1, r1, lsl #8 /* r1 = ..B. */1251orr r1, r1, ip, lsr #24 /* r1 = ..BA */1252str r2, [r0, #0x02]1253str r3, [r0, #0x06]1254strh r1, [r0, #0x0a]1255bx lr1256LMEMCPY_C_PAD12571258/*1259* 1010: dst is 16-bit aligned, src is 16-bit aligned1260*/1261ldrh r2, [r1]1262ldr r3, [r1, #0x02]1263ldr ip, [r1, #0x06]1264ldrh r1, [r1, #0x0a]1265strh r2, [r0]1266str r3, [r0, #0x02]1267str ip, [r0, #0x06]1268strh r1, [r0, #0x0a]1269bx lr1270LMEMCPY_C_PAD12711272/*1273* 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)1274*/1275ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */1276ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */1277mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */1278strh ip, [r0, #0x0a]1279ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */1280ldrb r1, [r1] /* r1 = ...0 */1281mov r2, r2, lsl #24 /* r2 = 9... */1282orr r2, r2, r3, lsr #8 /* r2 = 9876 */1283mov r3, r3, lsl #24 /* r3 = 5... */1284orr r3, r3, ip, lsr #8 /* r3 = 5432 */1285orr r1, r1, ip, lsl #8 /* r1 = 3210 */1286str r2, [r0, #0x06]1287str r3, [r0, #0x02]1288strh r1, [r0]1289bx lr1290LMEMCPY_C_PAD12911292/*1293* 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned1294*/1295ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */1296ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */1297ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */1298strb r2, [r0]1299mov r3, r2, lsr #8 /* r3 = .321 */1300orr r3, r3, ip, lsl #24 /* r3 = 4321 */1301str r3, [r0, #0x01]1302mov r3, ip, lsr #8 /* r3 = .765 */1303orr r3, r3, r1, lsl #24 /* r3 = 8765 */1304str r3, [r0, #0x05]1305mov r1, r1, lsr #8 /* r1 = .BA9 */1306strh r1, [r0, #0x09]1307mov r1, r1, lsr #16 /* r1 = ...B */1308strb r1, [r0, #0x0b]1309bx lr1310LMEMCPY_C_PAD13111312/*1313* 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)1314*/1315ldrb r2, [r1, #0x0b] /* r2 = ...B */1316ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */1317ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */1318ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */1319strb r2, [r0, #0x0b]1320mov r2, r3, lsr #16 /* r2 = ..A9 */1321strh r2, [r0, #0x09]1322mov r3, r3, lsl #16 /* r3 = 87.. */1323orr r3, r3, ip, lsr #16 /* r3 = 8765 */1324mov ip, ip, lsl #16 /* ip = 43.. */1325orr ip, ip, r1, lsr #16 /* ip = 4321 */1326mov r1, r1, lsr #8 /* r1 = .210 */1327str r3, [r0, #0x05]1328str ip, [r0, #0x01]1329strb r1, [r0]1330bx lr1331LMEMCPY_C_PAD13321333/*1334* 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned1335*/1336ldrh r2, [r1] /* r2 = ..10 */1337ldr r3, [r1, #0x02] /* r3 = 5432 */1338ldr ip, [r1, #0x06] /* ip = 9876 */1339ldrh r1, [r1, #0x0a] /* r1 = ..BA */1340strb r2, [r0]1341mov r2, r2, lsr #8 /* r2 = ...1 */1342orr r2, r2, r3, lsl #8 /* r2 = 4321 */1343mov r3, r3, lsr #24 /* r3 = ...5 */1344orr r3, r3, ip, lsl #8 /* r3 = 8765 */1345mov ip, ip, lsr #24 /* ip = ...9 */1346orr ip, ip, r1, lsl #8 /* ip = .BA9 */1347mov r1, r1, lsr #8 /* r1 = ...B */1348str r2, [r0, #0x01]1349str r3, [r0, #0x05]1350strh ip, [r0, #0x09]1351strb r1, [r0, #0x0b]1352bx lr1353LMEMCPY_C_PAD13541355/*1356* 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)1357*/1358ldrb r2, [r1]1359ldr r3, [r1, #0x01]1360ldr ip, [r1, #0x05]1361strb r2, [r0]1362ldrh r2, [r1, #0x09]1363ldrb r1, [r1, #0x0b]1364str r3, [r0, #0x01]1365str ip, [r0, #0x05]1366strh r2, [r0, #0x09]1367strb r1, [r0, #0x0b]1368bx lr1369#endif /* !_STANDALONE */1370END(memcpy)13711372.section .note.GNU-stack,"",%progbits137313741375