Path: blob/master/arch/arm/lib/csumpartialcopygeneric.S
10817 views
/*1* linux/arch/arm/lib/csumpartialcopygeneric.S2*3* Copyright (C) 1995-2001 Russell King4*5* This program is free software; you can redistribute it and/or modify6* it under the terms of the GNU General Public License version 2 as7* published by the Free Software Foundation.8*/910/*11* unsigned int12* csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )13* r0 = src, r1 = dst, r2 = len, r3 = sum14* Returns : r0 = checksum15*16* Note that 'tst' and 'teq' preserve the carry flag.17*/1819src .req r020dst .req r121len .req r222sum .req r32324.Lzero: mov r0, sum25load_regs2627/*28* Align an unaligned destination pointer. We know that29* we have >= 8 bytes here, so we don't need to check30* the length. Note that the source pointer hasn't been31* aligned yet.32*/33.Ldst_unaligned:34tst dst, #135beq .Ldst_16bit3637load1b ip38sub len, len, #139adcs sum, sum, ip, put_byte_1 @ update checksum40strb ip, [dst], #141tst dst, #242moveq pc, lr @ dst is now 32bit aligned4344.Ldst_16bit: load2b r8, ip45sub len, len, #246adcs sum, sum, r8, put_byte_047strb r8, [dst], #148adcs sum, sum, ip, put_byte_149strb ip, [dst], #150mov pc, lr @ dst is now 32bit aligned5152/*53* Handle 0 to 7 bytes, with any alignment of source and54* destination pointers. Note that when we get here, C = 055*/56.Lless8: teq len, #0 @ check for zero count57beq .Lzero5859/* we must have at least one byte. */60tst dst, #1 @ dst 16-bit aligned61beq .Lless8_aligned6263/* Align dst */64load1b ip65sub len, len, #166adcs sum, sum, ip, put_byte_1 @ update checksum67strb ip, [dst], #168tst len, #669beq .Lless8_byteonly70711: load2b r8, ip72sub len, len, #273adcs sum, sum, r8, put_byte_074strb r8, [dst], #175adcs sum, sum, ip, put_byte_176strb ip, [dst], #177.Lless8_aligned:78tst len, #679bne 1b80.Lless8_byteonly:81tst len, #182beq .Ldone83load1b r884adcs sum, sum, r8, put_byte_0 @ update checksum85strb r8, [dst], #186b .Ldone8788FN_ENTRY89save_regs9091cmp len, #8 @ Ensure that we have at least92blo .Lless8 @ 8 bytes to copy.9394adds sum, sum, #0 @ C = 095tst dst, #3 @ Test destination alignment96blne .Ldst_unaligned @ align destination, return here9798/*99* Ok, the dst pointer is now 32bit aligned, and we know100* that we must have more than 4 bytes to copy. Note101* that C contains the carry from the dst alignment above.102*/103104tst src, #3 @ Test source alignment105bne .Lsrc_not_aligned106107/* Routine for src & dst aligned */108109bics ip, len, #15110beq 2f1111121: load4l r4, r5, r6, r7113stmia dst!, {r4, r5, r6, r7}114adcs sum, sum, r4115adcs sum, sum, r5116adcs sum, sum, r6117adcs sum, sum, r7118sub ip, ip, #16119teq ip, #0120bne 1b1211222: ands ip, len, #12123beq 4f124tst ip, #8125beq 3f126load2l r4, r5127stmia dst!, {r4, r5}128adcs sum, sum, r4129adcs sum, sum, r5130tst ip, #4131beq 4f1321333: load1l r4134str r4, [dst], #4135adcs sum, sum, r41361374: ands len, len, #3138beq .Ldone139load1l r4140tst len, #2141mov r5, r4, get_byte_0142beq .Lexit143adcs sum, sum, r4, push #16144strb r5, [dst], #1145mov r5, r4, get_byte_1146strb r5, [dst], #1147mov r5, r4, get_byte_2148.Lexit: tst len, #1149strneb r5, [dst], #1150andne r5, r5, #255151adcnes sum, sum, r5, put_byte_0152153/*154* If the dst pointer was not 16-bit aligned, we155* need to rotate the checksum here to get around156* the inefficient byte manipulations in the157* architecture independent code.158*/159.Ldone: adc r0, sum, #0160ldr sum, [sp, #0] @ dst161tst sum, #1162movne r0, r0, ror #8163load_regs164165.Lsrc_not_aligned:166adc sum, sum, #0 @ include C from dst alignment167and ip, src, #3168bic src, src, #3169load1l r5170cmp ip, #2171beq .Lsrc2_aligned172bhi .Lsrc3_aligned173mov r4, r5, pull #8 @ C = 0174bics ip, len, #15175beq 2f1761: load4l r5, r6, r7, r8177orr r4, r4, r5, push #24178mov r5, r5, pull #8179orr r5, r5, r6, push #24180mov r6, r6, pull #8181orr r6, r6, r7, push #24182mov r7, r7, pull #8183orr r7, r7, r8, push #24184stmia dst!, {r4, r5, r6, r7}185adcs sum, sum, r4186adcs sum, sum, r5187adcs sum, sum, r6188adcs sum, sum, r7189mov r4, r8, pull #8190sub ip, ip, #16191teq ip, #0192bne 1b1932: ands ip, len, #12194beq 4f195tst ip, #8196beq 3f197load2l r5, r6198orr r4, r4, r5, push #24199mov r5, r5, pull #8200orr r5, r5, r6, push #24201stmia dst!, {r4, r5}202adcs sum, sum, r4203adcs sum, sum, r5204mov r4, r6, pull #8205tst ip, #4206beq 4f2073: load1l r5208orr r4, r4, r5, push #24209str r4, [dst], #4210adcs sum, sum, r4211mov r4, r5, pull #82124: ands len, len, #3213beq .Ldone214mov r5, r4, get_byte_0215tst len, #2216beq .Lexit217adcs sum, sum, r4, push #16218strb r5, [dst], #1219mov r5, r4, get_byte_1220strb r5, [dst], #1221mov r5, r4, get_byte_2222b .Lexit223224.Lsrc2_aligned: mov r4, r5, pull #16225adds sum, sum, #0226bics ip, len, #15227beq 2f2281: load4l r5, r6, r7, r8229orr r4, r4, r5, push #16230mov r5, r5, pull #16231orr r5, r5, r6, push #16232mov r6, r6, pull #16233orr r6, r6, r7, push #16234mov r7, r7, pull #16235orr r7, r7, r8, push #16236stmia dst!, {r4, r5, r6, r7}237adcs sum, sum, r4238adcs sum, sum, r5239adcs sum, sum, r6240adcs sum, sum, r7241mov r4, r8, pull #16242sub ip, ip, #16243teq ip, #0244bne 1b2452: ands ip, len, #12246beq 4f247tst ip, #8248beq 3f249load2l r5, r6250orr r4, r4, r5, push #16251mov r5, r5, pull #16252orr r5, r5, r6, push #16253stmia dst!, {r4, r5}254adcs sum, sum, r4255adcs sum, sum, r5256mov r4, r6, pull #16257tst ip, #4258beq 4f2593: load1l r5260orr r4, r4, r5, push #16261str r4, [dst], #4262adcs sum, sum, r4263mov r4, r5, pull #162644: ands len, len, #3265beq .Ldone266mov r5, r4, get_byte_0267tst len, #2268beq .Lexit269adcs sum, sum, r4270strb r5, [dst], #1271mov r5, r4, get_byte_1272strb r5, [dst], #1273tst len, #1274beq .Ldone275load1b r5276b .Lexit277278.Lsrc3_aligned: mov r4, r5, pull #24279adds sum, sum, #0280bics ip, len, #15281beq 2f2821: load4l r5, r6, r7, r8283orr r4, r4, r5, push #8284mov r5, r5, pull #24285orr r5, r5, r6, push #8286mov r6, r6, pull #24287orr r6, r6, r7, push #8288mov r7, r7, pull #24289orr r7, r7, r8, push #8290stmia dst!, {r4, r5, r6, r7}291adcs sum, sum, r4292adcs sum, sum, r5293adcs sum, sum, r6294adcs sum, sum, r7295mov r4, r8, pull #24296sub ip, ip, #16297teq ip, #0298bne 1b2992: ands ip, len, #12300beq 4f301tst ip, #8302beq 3f303load2l r5, r6304orr r4, r4, r5, push #8305mov r5, r5, pull #24306orr r5, r5, r6, push #8307stmia dst!, {r4, r5}308adcs sum, sum, r4309adcs sum, sum, r5310mov r4, r6, pull #24311tst ip, #4312beq 4f3133: load1l r5314orr r4, r4, r5, push #8315str r4, [dst], #4316adcs sum, sum, r4317mov r4, r5, pull #243184: ands len, len, #3319beq .Ldone320mov r5, r4, get_byte_0321tst len, #2322beq .Lexit323strb r5, [dst], #1324adcs sum, sum, r4325load1l r4326mov r5, r4, get_byte_0327strb r5, [dst], #1328adcs sum, sum, r4, push #24329mov r5, r4, get_byte_1330b .Lexit331FN_EXIT332333334