/*1* This file is subject to the terms and conditions of the GNU General Public2* License. See the file "COPYING" in the main directory of this archive3* for more details.4*5* Quick'n'dirty IP checksum ...6*7* Copyright (C) 1998, 1999 Ralf Baechle8* Copyright (C) 1999 Silicon Graphics, Inc.9* Copyright (C) 2007 Maciej W. Rozycki10* Copyright (C) 2014 Imagination Technologies Ltd.11*/12#include <linux/errno.h>13#include <linux/export.h>14#include <asm/asm.h>15#include <asm/asm-offsets.h>16#include <asm/regdef.h>1718#ifdef CONFIG_64BIT19/*20* As we are sharing code base with the mips32 tree (which use the o32 ABI21* register definitions). We need to redefine the register definitions from22* the n64 ABI register naming to the o32 ABI register naming.23*/24#undef t025#undef t126#undef t227#undef t328#define t0 $829#define t1 $930#define t2 $1031#define t3 $1132#define t4 $1233#define t5 $1334#define t6 $1435#define t7 $153637#define USE_DOUBLE38#endif3940#ifdef USE_DOUBLE4142#define LOAD ld43#define LOAD32 lwu44#define ADD daddu45#define NBYTES 84647#else4849#define LOAD lw50#define LOAD32 lw51#define ADD addu52#define NBYTES 45354#endif /* USE_DOUBLE */5556#define UNIT(unit) ((unit)*NBYTES)5758#define ADDC(sum,reg) \59.set push; \60.set noat; \61ADD sum, reg; \62sltu v1, sum, reg; \63ADD sum, v1; \64.set pop6566#define ADDC32(sum,reg) \67.set push; \68.set noat; \69addu sum, reg; \70sltu v1, sum, reg; \71addu sum, v1; \72.set pop7374#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \75LOAD _t0, (offset + UNIT(0))(src); \76LOAD _t1, (offset + UNIT(1))(src); \77LOAD _t2, (offset + UNIT(2))(src); \78LOAD _t3, (offset + UNIT(3))(src); \79ADDC(_t0, _t1); \80ADDC(_t2, _t3); \81ADDC(sum, _t0); \82ADDC(sum, _t2)8384#ifdef USE_DOUBLE85#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \86CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)87#else88#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \89CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \90CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)91#endif9293/*94* a0: source address95* a1: length of the area to checksum96* a2: partial checksum97*/9899#define src a0100#define sum v0101102.text103.set noreorder104.align 5105LEAF(csum_partial)106EXPORT_SYMBOL(csum_partial)107move sum, zero108move t7, zero109110sltiu t8, a1, 0x8111bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */112move t2, a1113114andi t7, src, 0x1 /* odd buffer? */115116.Lhword_align:117beqz t7, .Lword_align118andi t8, src, 0x2119120lbu t0, (src)121LONG_SUBU a1, a1, 0x1122#ifdef __MIPSEL__123sll t0, t0, 8124#endif125ADDC(sum, t0)126PTR_ADDU src, src, 0x1127andi t8, src, 0x2128129.Lword_align:130beqz t8, .Ldword_align131sltiu t8, a1, 56132133lhu t0, (src)134LONG_SUBU a1, a1, 0x2135ADDC(sum, t0)136sltiu t8, a1, 56137PTR_ADDU src, src, 0x2138139.Ldword_align:140bnez t8, .Ldo_end_words141move t8, a1142143andi t8, src, 0x4144beqz t8, .Lqword_align145andi t8, src, 0x8146147LOAD32 t0, 0x00(src)148LONG_SUBU a1, a1, 0x4149ADDC(sum, t0)150PTR_ADDU src, src, 0x4151andi t8, src, 0x8152153.Lqword_align:154beqz t8, .Loword_align155andi t8, src, 0x10156157#ifdef USE_DOUBLE158ld t0, 0x00(src)159LONG_SUBU a1, a1, 0x8160ADDC(sum, t0)161#else162lw t0, 0x00(src)163lw t1, 0x04(src)164LONG_SUBU a1, a1, 0x8165ADDC(sum, t0)166ADDC(sum, t1)167#endif168PTR_ADDU src, src, 0x8169andi t8, src, 0x10170171.Loword_align:172beqz t8, .Lbegin_movement173LONG_SRL t8, a1, 0x7174175#ifdef USE_DOUBLE176ld t0, 0x00(src)177ld t1, 0x08(src)178ADDC(sum, t0)179ADDC(sum, t1)180#else181CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)182#endif183LONG_SUBU a1, a1, 0x10184PTR_ADDU src, src, 0x10185LONG_SRL t8, a1, 0x7186187.Lbegin_movement:188beqz t8, 1f189andi t2, a1, 0x40190191.Lmove_128bytes:192CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)193CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)194CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)195CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)196LONG_SUBU t8, t8, 0x01197.set reorder /* DADDI_WAR */198PTR_ADDU src, src, 0x80199bnez t8, .Lmove_128bytes200.set noreorder2012021:203beqz t2, 1f204andi t2, a1, 0x20205206.Lmove_64bytes:207CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)208CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)209PTR_ADDU src, src, 0x402102111:212beqz t2, .Ldo_end_words213andi t8, a1, 0x1c214215.Lmove_32bytes:216CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)217andi t8, a1, 0x1c218PTR_ADDU src, src, 0x20219220.Ldo_end_words:221beqz t8, .Lsmall_csumcpy222andi t2, a1, 0x3223LONG_SRL t8, t8, 0x2224225.Lend_words:226LOAD32 t0, (src)227LONG_SUBU t8, t8, 0x1228ADDC(sum, t0)229.set reorder /* DADDI_WAR */230PTR_ADDU src, src, 0x4231bnez t8, .Lend_words232.set noreorder233234/* unknown src alignment and < 8 bytes to go */235.Lsmall_csumcpy:236move a1, t2237238andi t0, a1, 4239beqz t0, 1f240andi t0, a1, 2241242/* Still a full word to go */243ulw t1, (src)244PTR_ADDIU src, 4245#ifdef USE_DOUBLE246dsll t1, t1, 32 /* clear lower 32bit */247#endif248ADDC(sum, t1)2492501: move t1, zero251beqz t0, 1f252andi t0, a1, 1253254/* Still a halfword to go */255ulhu t1, (src)256PTR_ADDIU src, 22572581: beqz t0, 1f259sll t1, t1, 16260261lbu t2, (src)262nop263264#ifdef __MIPSEB__265sll t2, t2, 8266#endif267or t1, t22682691: ADDC(sum, t1)270271/* fold checksum */272#ifdef USE_DOUBLE273dsll32 v1, sum, 0274daddu sum, v1275sltu v1, sum, v1276dsra32 sum, sum, 0277addu sum, v1278#endif279280/* odd buffer alignment? */281#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \282defined(CONFIG_CPU_LOONGSON64)283.set push284.set arch=mips32r2285wsbh v1, sum286movn sum, v1, t7287.set pop288#else289beqz t7, 1f /* odd buffer alignment? */290lui v1, 0x00ff291addu v1, 0x00ff292and t0, sum, v1293sll t0, t0, 8294srl sum, sum, 8295and sum, sum, v1296or sum, sum, t02971:298#endif299.set reorder300/* Add the passed partial csum. */301ADDC32(sum, a2)302jr ra303.set noreorder304END(csum_partial)305306307/*308* checksum and copy routines based on memcpy.S309*310* csum_partial_copy_nocheck(src, dst, len)311* __csum_partial_copy_kernel(src, dst, len)312*313* See "Spec" in memcpy.S for details. Unlike __copy_user, all314* function in this file use the standard calling convention.315*/316317#define src a0318#define dst a1319#define len a2320#define sum v0321#define odd t8322323/*324* All exception handlers simply return 0.325*/326327/* Instruction type */328#define LD_INSN 1329#define ST_INSN 2330#define LEGACY_MODE 1331#define EVA_MODE 2332#define USEROP 1333#define KERNELOP 2334335/*336* Wrapper to add an entry in the exception table337* in case the insn causes a memory exception.338* Arguments:339* insn : Load/store instruction340* type : Instruction type341* reg : Register342* addr : Address343* handler : Exception handler344*/345#define EXC(insn, type, reg, addr) \346.if \mode == LEGACY_MODE; \3479: insn reg, addr; \348.section __ex_table,"a"; \349PTR_WD 9b, .L_exc; \350.previous; \351/* This is enabled in EVA mode */ \352.else; \353/* If loading from user or storing to user */ \354.if ((\from == USEROP) && (type == LD_INSN)) || \355((\to == USEROP) && (type == ST_INSN)); \3569: __BUILD_EVA_INSN(insn##e, reg, addr); \357.section __ex_table,"a"; \358PTR_WD 9b, .L_exc; \359.previous; \360.else; \361/* EVA without exception */ \362insn reg, addr; \363.endif; \364.endif365366#undef LOAD367368#ifdef USE_DOUBLE369370#define LOADK ld /* No exception */371#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)372#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)373#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)374#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)375#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)376#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)377#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)378#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)379#define ADD daddu380#define SUB dsubu381#define SRL dsrl382#define SLL dsll383#define SLLV dsllv384#define SRLV dsrlv385#define NBYTES 8386#define LOG_NBYTES 3387388#else389390#define LOADK lw /* No exception */391#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)392#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)393#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)394#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)395#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)396#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)397#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)398#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)399#define ADD addu400#define SUB subu401#define SRL srl402#define SLL sll403#define SLLV sllv404#define SRLV srlv405#define NBYTES 4406#define LOG_NBYTES 2407408#endif /* USE_DOUBLE */409410#ifdef CONFIG_CPU_LITTLE_ENDIAN411#define LDFIRST LOADR412#define LDREST LOADL413#define STFIRST STORER414#define STREST STOREL415#define SHIFT_DISCARD SLLV416#define SHIFT_DISCARD_REVERT SRLV417#else418#define LDFIRST LOADL419#define LDREST LOADR420#define STFIRST STOREL421#define STREST STORER422#define SHIFT_DISCARD SRLV423#define SHIFT_DISCARD_REVERT SLLV424#endif425426#define FIRST(unit) ((unit)*NBYTES)427#define REST(unit) (FIRST(unit)+NBYTES-1)428429#define ADDRMASK (NBYTES-1)430431#ifndef CONFIG_CPU_DADDI_WORKAROUNDS432.set noat433#else434.set at=v1435#endif436437.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to438439li sum, -1440move odd, zero441/*442* Note: dst & src may be unaligned, len may be 0443* Temps444*/445/*446* The "issue break"s below are very approximate.447* Issue delays for dcache fills will perturb the schedule, as will448* load queue full replay traps, etc.449*450* If len < NBYTES use byte operations.451*/452sltu t2, len, NBYTES453and t1, dst, ADDRMASK454bnez t2, .Lcopy_bytes_checklen\@455and t0, src, ADDRMASK456andi odd, dst, 0x1 /* odd buffer? */457bnez t1, .Ldst_unaligned\@458nop459bnez t0, .Lsrc_unaligned_dst_aligned\@460/*461* use delay slot for fall-through462* src and dst are aligned; need to compute rem463*/464.Lboth_aligned\@:465SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter466beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES467nop468SUB len, 8*NBYTES # subtract here for bgez loop469.align 44701:471LOAD(t0, UNIT(0)(src))472LOAD(t1, UNIT(1)(src))473LOAD(t2, UNIT(2)(src))474LOAD(t3, UNIT(3)(src))475LOAD(t4, UNIT(4)(src))476LOAD(t5, UNIT(5)(src))477LOAD(t6, UNIT(6)(src))478LOAD(t7, UNIT(7)(src))479SUB len, len, 8*NBYTES480ADD src, src, 8*NBYTES481STORE(t0, UNIT(0)(dst))482ADDC(t0, t1)483STORE(t1, UNIT(1)(dst))484ADDC(sum, t0)485STORE(t2, UNIT(2)(dst))486ADDC(t2, t3)487STORE(t3, UNIT(3)(dst))488ADDC(sum, t2)489STORE(t4, UNIT(4)(dst))490ADDC(t4, t5)491STORE(t5, UNIT(5)(dst))492ADDC(sum, t4)493STORE(t6, UNIT(6)(dst))494ADDC(t6, t7)495STORE(t7, UNIT(7)(dst))496ADDC(sum, t6)497.set reorder /* DADDI_WAR */498ADD dst, dst, 8*NBYTES499bgez len, 1b500.set noreorder501ADD len, 8*NBYTES # revert len (see above)502503/*504* len == the number of bytes left to copy < 8*NBYTES505*/506.Lcleanup_both_aligned\@:507#define rem t7508beqz len, .Ldone\@509sltu t0, len, 4*NBYTES510bnez t0, .Lless_than_4units\@511and rem, len, (NBYTES-1) # rem = len % NBYTES512/*513* len >= 4*NBYTES514*/515LOAD(t0, UNIT(0)(src))516LOAD(t1, UNIT(1)(src))517LOAD(t2, UNIT(2)(src))518LOAD(t3, UNIT(3)(src))519SUB len, len, 4*NBYTES520ADD src, src, 4*NBYTES521STORE(t0, UNIT(0)(dst))522ADDC(t0, t1)523STORE(t1, UNIT(1)(dst))524ADDC(sum, t0)525STORE(t2, UNIT(2)(dst))526ADDC(t2, t3)527STORE(t3, UNIT(3)(dst))528ADDC(sum, t2)529.set reorder /* DADDI_WAR */530ADD dst, dst, 4*NBYTES531beqz len, .Ldone\@532.set noreorder533.Lless_than_4units\@:534/*535* rem = len % NBYTES536*/537beq rem, len, .Lcopy_bytes\@538nop5391:540LOAD(t0, 0(src))541ADD src, src, NBYTES542SUB len, len, NBYTES543STORE(t0, 0(dst))544ADDC(sum, t0)545.set reorder /* DADDI_WAR */546ADD dst, dst, NBYTES547bne rem, len, 1b548.set noreorder549550/*551* src and dst are aligned, need to copy rem bytes (rem < NBYTES)552* A loop would do only a byte at a time with possible branch553* mispredicts. Can't do an explicit LOAD dst,mask,or,STORE554* because can't assume read-access to dst. Instead, use555* STREST dst, which doesn't require read access to dst.556*557* This code should perform better than a simple loop on modern,558* wide-issue mips processors because the code has fewer branches and559* more instruction-level parallelism.560*/561#define bits t2562beqz len, .Ldone\@563ADD t1, dst, len # t1 is just past last byte of dst564li bits, 8*NBYTES565SLL rem, len, 3 # rem = number of bits to keep566LOAD(t0, 0(src))567SUB bits, bits, rem # bits = number of bits to discard568SHIFT_DISCARD t0, t0, bits569STREST(t0, -1(t1))570SHIFT_DISCARD_REVERT t0, t0, bits571.set reorder572ADDC(sum, t0)573b .Ldone\@574.set noreorder575.Ldst_unaligned\@:576/*577* dst is unaligned578* t0 = src & ADDRMASK579* t1 = dst & ADDRMASK; T1 > 0580* len >= NBYTES581*582* Copy enough bytes to align dst583* Set match = (src and dst have same alignment)584*/585#define match rem586LDFIRST(t3, FIRST(0)(src))587ADD t2, zero, NBYTES588LDREST(t3, REST(0)(src))589SUB t2, t2, t1 # t2 = number of bytes copied590xor match, t0, t1591STFIRST(t3, FIRST(0)(dst))592SLL t4, t1, 3 # t4 = number of bits to discard593SHIFT_DISCARD t3, t3, t4594/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */595ADDC(sum, t3)596beq len, t2, .Ldone\@597SUB len, len, t2598ADD dst, dst, t2599beqz match, .Lboth_aligned\@600ADD src, src, t2601602.Lsrc_unaligned_dst_aligned\@:603SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter604beqz t0, .Lcleanup_src_unaligned\@605and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES6061:607/*608* Avoid consecutive LD*'s to the same register since some mips609* implementations can't issue them in the same cycle.610* It's OK to load FIRST(N+1) before REST(N) because the two addresses611* are to the same unit (unless src is aligned, but it's not).612*/613LDFIRST(t0, FIRST(0)(src))614LDFIRST(t1, FIRST(1)(src))615SUB len, len, 4*NBYTES616LDREST(t0, REST(0)(src))617LDREST(t1, REST(1)(src))618LDFIRST(t2, FIRST(2)(src))619LDFIRST(t3, FIRST(3)(src))620LDREST(t2, REST(2)(src))621LDREST(t3, REST(3)(src))622ADD src, src, 4*NBYTES623#ifdef CONFIG_CPU_SB1624nop # improves slotting625#endif626STORE(t0, UNIT(0)(dst))627ADDC(t0, t1)628STORE(t1, UNIT(1)(dst))629ADDC(sum, t0)630STORE(t2, UNIT(2)(dst))631ADDC(t2, t3)632STORE(t3, UNIT(3)(dst))633ADDC(sum, t2)634.set reorder /* DADDI_WAR */635ADD dst, dst, 4*NBYTES636bne len, rem, 1b637.set noreorder638639.Lcleanup_src_unaligned\@:640beqz len, .Ldone\@641and rem, len, NBYTES-1 # rem = len % NBYTES642beq rem, len, .Lcopy_bytes\@643nop6441:645LDFIRST(t0, FIRST(0)(src))646LDREST(t0, REST(0)(src))647ADD src, src, NBYTES648SUB len, len, NBYTES649STORE(t0, 0(dst))650ADDC(sum, t0)651.set reorder /* DADDI_WAR */652ADD dst, dst, NBYTES653bne len, rem, 1b654.set noreorder655656.Lcopy_bytes_checklen\@:657beqz len, .Ldone\@658nop659.Lcopy_bytes\@:660/* 0 < len < NBYTES */661#ifdef CONFIG_CPU_LITTLE_ENDIAN662#define SHIFT_START 0663#define SHIFT_INC 8664#else665#define SHIFT_START 8*(NBYTES-1)666#define SHIFT_INC -8667#endif668move t2, zero # partial word669li t3, SHIFT_START # shift670#define COPY_BYTE(N) \671LOADBU(t0, N(src)); \672SUB len, len, 1; \673STOREB(t0, N(dst)); \674SLLV t0, t0, t3; \675addu t3, SHIFT_INC; \676beqz len, .Lcopy_bytes_done\@; \677or t2, t0678679COPY_BYTE(0)680COPY_BYTE(1)681#ifdef USE_DOUBLE682COPY_BYTE(2)683COPY_BYTE(3)684COPY_BYTE(4)685COPY_BYTE(5)686#endif687LOADBU(t0, NBYTES-2(src))688SUB len, len, 1689STOREB(t0, NBYTES-2(dst))690SLLV t0, t0, t3691or t2, t0692.Lcopy_bytes_done\@:693ADDC(sum, t2)694.Ldone\@:695/* fold checksum */696.set push697.set noat698#ifdef USE_DOUBLE699dsll32 v1, sum, 0700daddu sum, v1701sltu v1, sum, v1702dsra32 sum, sum, 0703addu sum, v1704#endif705706#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \707defined(CONFIG_CPU_LOONGSON64)708.set push709.set arch=mips32r2710wsbh v1, sum711movn sum, v1, odd712.set pop713#else714beqz odd, 1f /* odd buffer alignment? */715lui v1, 0x00ff716addu v1, 0x00ff717and t0, sum, v1718sll t0, t0, 8719srl sum, sum, 8720and sum, sum, v1721or sum, sum, t07221:723#endif724.set pop725.set reorder726jr ra727.set noreorder728.endm729730.set noreorder731.L_exc:732jr ra733li v0, 0734735FEXPORT(__csum_partial_copy_nocheck)736EXPORT_SYMBOL(__csum_partial_copy_nocheck)737#ifndef CONFIG_EVA738FEXPORT(__csum_partial_copy_to_user)739EXPORT_SYMBOL(__csum_partial_copy_to_user)740FEXPORT(__csum_partial_copy_from_user)741EXPORT_SYMBOL(__csum_partial_copy_from_user)742#endif743__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP744745#ifdef CONFIG_EVA746LEAF(__csum_partial_copy_to_user)747__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP748END(__csum_partial_copy_to_user)749750LEAF(__csum_partial_copy_from_user)751__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP752END(__csum_partial_copy_from_user)753#endif754755756