/*1* INET An implementation of the TCP/IP protocol suite for the LINUX2* operating system. INET is implemented using the BSD Socket3* interface as the means of communication with the user level.4*5* IP/TCP/UDP checksumming routines6*7* Authors: Jorge Cwik, <[email protected]>8* Arnt Gulbrandsen, <[email protected]>9* Tom May, <[email protected]>10* Pentium Pro/II routines:11* Alexander Kjeldaas <[email protected]>12* Finn Arne Gangstad <[email protected]>13* Lots of code moved from tcp.c and ip.c; see those files14* for more names.15*16* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception17* handling.18* Andi Kleen, add zeroing on error19* converted to pure assembler20* Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.21*22* This program is free software; you can redistribute it and/or23* modify it under the terms of the GNU General Public License24* as published by the Free Software Foundation; either version25* 2 of the License, or (at your option) any later version.26*/2728#include <linux/linkage.h>29#include <asm/assembler.h>30#include <asm/errno.h>3132/*33* computes a partial checksum, e.g. for TCP/UDP fragments34*/3536/*37unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)38*/394041#ifdef CONFIG_ISA_DUAL_ISSUE4243/*44* Experiments with Ethernet and SLIP connections show that buff45* is aligned on either a 2-byte or 4-byte boundary. We get at46* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.47* Fortunately, it is easy to convert 2-byte alignment to 4-byte48* alignment for the unrolled loop.49*/5051.text52ENTRY(csum_partial)53; Function args54; r0: unsigned char *buff55; r1: int len56; r2: unsigned int sum5758push r2 || ldi r2, #059and3 r7, r0, #1 ; Check alignment.60beqz r7, 1f ; Jump if alignment is ok.61; 1-byte mis aligned62ldub r4, @r0 || addi r0, #163; clear c-bit || Alignment uses up bytes.64cmp r0, r0 || addi r1, #-165ldi r3, #0 || addx r2, r466addx r2, r367.fillinsn681:69and3 r4, r0, #2 ; Check alignment.70beqz r4, 2f ; Jump if alignment is ok.71; clear c-bit || Alignment uses up two bytes.72cmp r0, r0 || addi r1, #-273bgtz r1, 1f ; Jump if we had at least two bytes.74bra 4f || addi r1, #275.fillinsn ; len(r1) was < 2. Deal with it.761:77; 2-byte aligned78lduh r4, @r0 || ldi r3, #079addx r2, r4 || addi r0, #280addx r2, r381.fillinsn822:83; 4-byte aligned84cmp r0, r0 ; clear c-bit85srl3 r6, r1, #586beqz r6, 2f87.fillinsn88891: ld r3, @r0+90ld r4, @r0+ ; +491ld r5, @r0+ ; +892ld r3, @r0+ || addx r2, r3 ; +1293ld r4, @r0+ || addx r2, r4 ; +1694ld r5, @r0+ || addx r2, r5 ; +2095ld r3, @r0+ || addx r2, r3 ; +2496ld r4, @r0+ || addx r2, r4 ; +2897addx r2, r5 || addi r6, #-198addx r2, r399addx r2, r4100bnez r6, 1b101102addx r2, r6 ; r6=0103cmp r0, r0 ; This clears c-bit104.fillinsn1052: and3 r6, r1, #0x1c ; withdraw len106beqz r6, 4f107srli r6, #2108.fillinsn1091103: ld r4, @r0+ || addi r6, #-1111addx r2, r4112bnez r6, 3b113114addx r2, r6 ; r6=0115cmp r0, r0 ; This clears c-bit116.fillinsn1174: and3 r1, r1, #3118beqz r1, 7f ; if len == 0 goto end119and3 r6, r1, #2120beqz r6, 5f ; if len < 2 goto 5f(1byte)121lduh r4, @r0 || addi r0, #2122addi r1, #-2 || slli r4, #16123addx r2, r4124beqz r1, 6f125.fillinsn1265: ldub r4, @r0 || ldi r1, #0127#ifndef __LITTLE_ENDIAN__128slli r4, #8129#endif130addx r2, r4131.fillinsn1326: addx r2, r1133.fillinsn1347:135and3 r0, r2, #0xffff136srli r2, #16137add r0, r2138srl3 r2, r0, #16139beqz r2, 1f140addi r0, #1141and3 r0, r0, #0xffff142.fillinsn1431:144beqz r7, 1f ; swap the upper byte for the lower145and3 r2, r0, #0xff146srl3 r0, r0, #8147slli r2, #8148or r0, r2149.fillinsn1501:151pop r2 || cmp r0, r0152addx r0, r2 || ldi r2, #0153addx r0, r2154jmp r14155156#else /* not CONFIG_ISA_DUAL_ISSUE */157158/*159* Experiments with Ethernet and SLIP connections show that buff160* is aligned on either a 2-byte or 4-byte boundary. We get at161* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.162* Fortunately, it is easy to convert 2-byte alignment to 4-byte163* alignment for the unrolled loop.164*/165166.text167ENTRY(csum_partial)168; Function args169; r0: unsigned char *buff170; r1: int len171; r2: unsigned int sum172173push r2174ldi r2, #0175and3 r7, r0, #1 ; Check alignment.176beqz r7, 1f ; Jump if alignment is ok.177; 1-byte mis aligned178ldub r4, @r0179addi r0, #1180addi r1, #-1 ; Alignment uses up bytes.181cmp r0, r0 ; clear c-bit182ldi r3, #0183addx r2, r4184addx r2, r3185.fillinsn1861:187and3 r4, r0, #2 ; Check alignment.188beqz r4, 2f ; Jump if alignment is ok.189addi r1, #-2 ; Alignment uses up two bytes.190cmp r0, r0 ; clear c-bit191bgtz r1, 1f ; Jump if we had at least two bytes.192addi r1, #2 ; len(r1) was < 2. Deal with it.193bra 4f194.fillinsn1951:196; 2-byte aligned197lduh r4, @r0198addi r0, #2199ldi r3, #0200addx r2, r4201addx r2, r3202.fillinsn2032:204; 4-byte aligned205cmp r0, r0 ; clear c-bit206srl3 r6, r1, #5207beqz r6, 2f208.fillinsn2092101: ld r3, @r0+211ld r4, @r0+ ; +4212ld r5, @r0+ ; +8213addx r2, r3214addx r2, r4215addx r2, r5216ld r3, @r0+ ; +12217ld r4, @r0+ ; +16218ld r5, @r0+ ; +20219addx r2, r3220addx r2, r4221addx r2, r5222ld r3, @r0+ ; +24223ld r4, @r0+ ; +28224addi r6, #-1225addx r2, r3226addx r2, r4227bnez r6, 1b228addx r2, r6 ; r6=0229cmp r0, r0 ; This clears c-bit230.fillinsn2312322: and3 r6, r1, #0x1c ; withdraw len233beqz r6, 4f234srli r6, #2235.fillinsn2362373: ld r4, @r0+238addi r6, #-1239addx r2, r4240bnez r6, 3b241addx r2, r6 ; r6=0242cmp r0, r0 ; This clears c-bit243.fillinsn2442454: and3 r1, r1, #3246beqz r1, 7f ; if len == 0 goto end247and3 r6, r1, #2248beqz r6, 5f ; if len < 2 goto 5f(1byte)249250lduh r4, @r0251addi r0, #2252addi r1, #-2253slli r4, #16254addx r2, r4255beqz r1, 6f256.fillinsn2575: ldub r4, @r0258#ifndef __LITTLE_ENDIAN__259slli r4, #8260#endif261addx r2, r4262.fillinsn2636: ldi r5, #0264addx r2, r5265.fillinsn2667:267and3 r0, r2, #0xffff268srli r2, #16269add r0, r2270srl3 r2, r0, #16271beqz r2, 1f272addi r0, #1273and3 r0, r0, #0xffff274.fillinsn2751:276beqz r7, 1f277mv r2, r0278srl3 r0, r2, #8279and3 r2, r2, #0xff280slli r2, #8281or r0, r2282.fillinsn2831:284pop r2285cmp r0, r0286addx r0, r2287ldi r2, #0288addx r0, r2289jmp r14290291#endif /* not CONFIG_ISA_DUAL_ISSUE */292293/*294unsigned int csum_partial_copy_generic (const char *src, char *dst,295int len, int sum, int *src_err_ptr, int *dst_err_ptr)296*/297298/*299* Copy from ds while checksumming, otherwise like csum_partial300*301* The macros SRC and DST specify the type of access for the instruction.302* thus we can call a custom exception handler for all access types.303*304* FIXME: could someone double-check whether I haven't mixed up some SRC and305* DST definitions? It's damn hard to trigger all cases. I hope I got306* them all but there's no guarantee.307*/308309ENTRY(csum_partial_copy_generic)310nop311nop312nop313nop314jmp r14315nop316nop317nop318319.end320321322