/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $1*2* INET An implementation of the TCP/IP protocol suite for the LINUX3* operating system. INET is implemented using the BSD Socket4* interface as the means of communication with the user level.5*6* IP/TCP/UDP checksumming routines7*8* Authors: Jorge Cwik, <[email protected]>9* Arnt Gulbrandsen, <[email protected]>10* Tom May, <[email protected]>11* Pentium Pro/II routines:12* Alexander Kjeldaas <[email protected]>13* Finn Arne Gangstad <[email protected]>14* Lots of code moved from tcp.c and ip.c; see those files15* for more names.16*17* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception18* handling.19* Andi Kleen, add zeroing on error20* converted to pure assembler21*22* SuperH version: Copyright (C) 1999 Niibe Yutaka23*24* This program is free software; you can redistribute it and/or25* modify it under the terms of the GNU General Public License26* as published by the Free Software Foundation; either version27* 2 of the License, or (at your option) any later version.28*/2930#include <asm/errno.h>31#include <linux/linkage.h>3233/*34* computes a partial checksum, e.g. for TCP/UDP fragments35*/3637/*38* asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);39*/4041.text42ENTRY(csum_partial)43/*44* Experiments with Ethernet and SLIP connections show that buff45* is aligned on either a 2-byte or 4-byte boundary. We get at46* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.47* Fortunately, it is easy to convert 2-byte alignment to 4-byte48* alignment for the unrolled loop.49*/50mov r4, r051tst #3, r0 ! Check alignment.52bt/s 2f ! Jump if alignment is ok.53mov r4, r7 ! Keep a copy to check for alignment54!55tst #1, r0 ! Check alignment.56bt 21f ! Jump if alignment is boundary of 2bytes.5758! buf is odd59tst r5, r560add #-1, r561bt 9f62mov.b @r4+, r063extu.b r0, r064addc r0, r6 ! t=0 from previous tst65mov r6, r066shll8 r667shlr16 r068shlr8 r069or r0, r670mov r4, r071tst #2, r072bt 2f7321:74! buf is 2 byte aligned (len could be 0)75add #-2, r5 ! Alignment uses up two bytes.76cmp/pz r5 !77bt/s 1f ! Jump if we had at least two bytes.78clrt79bra 6f80add #2, r5 ! r5 was < 2. Deal with it.811:82mov.w @r4+, r083extu.w r0, r084addc r0, r685bf 2f86add #1, r6872:88! buf is 4 byte aligned (len could be 0)89mov r5, r190mov #-5, r091shld r0, r192tst r1, r193bt/s 4f ! if it's =0, go to 4f94clrt95.align 2963:97mov.l @r4+, r098mov.l @r4+, r299mov.l @r4+, r3100addc r0, r6101mov.l @r4+, r0102addc r2, r6103mov.l @r4+, r2104addc r3, r6105mov.l @r4+, r3106addc r0, r6107mov.l @r4+, r0108addc r2, r6109mov.l @r4+, r2110addc r3, r6111addc r0, r6112addc r2, r6113movt r0114dt r1115bf/s 3b116cmp/eq #1, r0117! here, we know r1==0118addc r1, r6 ! add carry to r61194:120mov r5, r0121and #0x1c, r0122tst r0, r0123bt 6f124! 4 bytes or more remaining125mov r0, r1126shlr2 r1127mov #0, r21285:129addc r2, r6130mov.l @r4+, r2131movt r0132dt r1133bf/s 5b134cmp/eq #1, r0135addc r2, r6136addc r1, r6 ! r1==0 here, so it means add carry-bit1376:138! 3 bytes or less remaining139mov #3, r0140and r0, r5141tst r5, r5142bt 9f ! if it's =0 go to 9f143mov #2, r1144cmp/hs r1, r5145bf 7f146mov.w @r4+, r0147extu.w r0, r0148cmp/eq r1, r5149bt/s 8f150clrt151shll16 r0152addc r0, r61537:154mov.b @r4+, r0155extu.b r0, r0156#ifndef __LITTLE_ENDIAN__157shll8 r0158#endif1598:160addc r0, r6161mov #0, r0162addc r0, r61639:164! Check if the buffer was misaligned, if so realign sum165mov r7, r0166tst #1, r0167bt 10f168mov r6, r0169shll8 r6170shlr16 r0171shlr8 r0172or r0, r617310:174rts175mov r6, r0176177/*178unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,179int sum, int *src_err_ptr, int *dst_err_ptr)180*/181182/*183* Copy from ds while checksumming, otherwise like csum_partial184*185* The macros SRC and DST specify the type of access for the instruction.186* thus we can call a custom exception handler for all access types.187*188* FIXME: could someone double-check whether I haven't mixed up some SRC and189* DST definitions? It's damn hard to trigger all cases. I hope I got190* them all but there's no guarantee.191*/192193#define SRC(...) \1949999: __VA_ARGS__ ; \195.section __ex_table, "a"; \196.long 9999b, 6001f ; \197.previous198199#define DST(...) \2009999: __VA_ARGS__ ; \201.section __ex_table, "a"; \202.long 9999b, 6002f ; \203.previous204205!206! r4: const char *SRC207! r5: char *DST208! r6: int LEN209! r7: int SUM210!211! on stack:212! int *SRC_ERR_PTR213! int *DST_ERR_PTR214!215ENTRY(csum_partial_copy_generic)216mov.l r5,@-r15217mov.l r6,@-r15218219mov #3,r0 ! Check src and dest are equally aligned220mov r4,r1221and r0,r1222and r5,r0223cmp/eq r1,r0224bf 3f ! Different alignments, use slow version225tst #1,r0 ! Check dest word aligned226bf 3f ! If not, do it the slow way227228mov #2,r0229tst r0,r5 ! Check dest alignment.230bt 2f ! Jump if alignment is ok.231add #-2,r6 ! Alignment uses up two bytes.232cmp/pz r6 ! Jump if we had at least two bytes.233bt/s 1f234clrt235add #2,r6 ! r6 was < 2. Deal with it.236bra 4f237mov r6,r22382393: ! Handle different src and dest alignments.240! This is not common, so simple byte by byte copy will do.241mov r6,r2242shlr r6243tst r6,r6244bt 4f245clrt246.align 22475:248SRC( mov.b @r4+,r1 )249SRC( mov.b @r4+,r0 )250extu.b r1,r1251DST( mov.b r1,@r5 )252DST( mov.b r0,@(1,r5) )253extu.b r0,r0254add #2,r5255256#ifdef __LITTLE_ENDIAN__257shll8 r0258#else259shll8 r1260#endif261or r1,r0262263addc r0,r7264movt r0265dt r6266bf/s 5b267cmp/eq #1,r0268mov #0,r0269addc r0, r7270271mov r2, r0272tst #1, r0273bt 7f274bra 5f275clrt276277! src and dest equally aligned, but to a two byte boundary.278! Handle first two bytes as a special case279.align 22801:281SRC( mov.w @r4+,r0 )282DST( mov.w r0,@r5 )283add #2,r5284extu.w r0,r0285addc r0,r7286mov #0,r0287addc r0,r72882:289mov r6,r2290mov #-5,r0291shld r0,r6292tst r6,r6293bt/s 2f294clrt295.align 22961:297SRC( mov.l @r4+,r0 )298SRC( mov.l @r4+,r1 )299addc r0,r7300DST( mov.l r0,@r5 )301DST( mov.l r1,@(4,r5) )302addc r1,r7303304SRC( mov.l @r4+,r0 )305SRC( mov.l @r4+,r1 )306addc r0,r7307DST( mov.l r0,@(8,r5) )308DST( mov.l r1,@(12,r5) )309addc r1,r7310311SRC( mov.l @r4+,r0 )312SRC( mov.l @r4+,r1 )313addc r0,r7314DST( mov.l r0,@(16,r5) )315DST( mov.l r1,@(20,r5) )316addc r1,r7317318SRC( mov.l @r4+,r0 )319SRC( mov.l @r4+,r1 )320addc r0,r7321DST( mov.l r0,@(24,r5) )322DST( mov.l r1,@(28,r5) )323addc r1,r7324add #32,r5325movt r0326dt r6327bf/s 1b328cmp/eq #1,r0329mov #0,r0330addc r0,r73313322: mov r2,r6333mov #0x1c,r0334and r0,r6335cmp/pl r6336bf/s 4f337clrt338shlr2 r63393:340SRC( mov.l @r4+,r0 )341addc r0,r7342DST( mov.l r0,@r5 )343add #4,r5344movt r0345dt r6346bf/s 3b347cmp/eq #1,r0348mov #0,r0349addc r0,r73504: mov r2,r6351mov #3,r0352and r0,r6353cmp/pl r6354bf 7f355mov #2,r1356cmp/hs r1,r6357bf 5f358SRC( mov.w @r4+,r0 )359DST( mov.w r0,@r5 )360extu.w r0,r0361add #2,r5362cmp/eq r1,r6363bt/s 6f364clrt365shll16 r0366addc r0,r73675:368SRC( mov.b @r4+,r0 )369DST( mov.b r0,@r5 )370extu.b r0,r0371#ifndef __LITTLE_ENDIAN__372shll8 r0373#endif3746: addc r0,r7375mov #0,r0376addc r0,r73777:3785000:379380# Exception handler:381.section .fixup, "ax"3823836001:384mov.l @(8,r15),r0 ! src_err_ptr385mov #-EFAULT,r1386mov.l r1,@r0387388! zero the complete destination - computing the rest389! is too much work390mov.l @(4,r15),r5 ! dst391mov.l @r15,r6 ! len392mov #0,r73931: mov.b r7,@r5394dt r6395bf/s 1b396add #1,r5397mov.l 8000f,r0398jmp @r0399nop400.align 24018000: .long 5000b4024036002:404mov.l @(12,r15),r0 ! dst_err_ptr405mov #-EFAULT,r1406mov.l r1,@r0407mov.l 8001f,r0408jmp @r0409nop410.align 24118001: .long 5000b412413.previous414add #8,r15415rts416mov r7,r0417418419