/* SPDX-License-Identifier: GPL-2.0-or-later */1/*2* This file contains assembly-language implementations3* of IP-style 1's complement checksum routines.4*5* Copyright (C) 1995-1996 Gary Thomas ([email protected])6*7* Severely hacked about by Paul Mackerras ([email protected]).8*/910#include <linux/export.h>11#include <linux/sys.h>12#include <asm/processor.h>13#include <asm/cache.h>14#include <asm/errno.h>15#include <asm/ppc_asm.h>1617.text1819/*20* computes the checksum of a memory block at buff, length len,21* and adds in "sum" (32-bit)22*23* __csum_partial(buff, len, sum)24*/25_GLOBAL(__csum_partial)26subi r3,r3,427srawi. r6,r4,2 /* Divide len by 4 and also clear carry */28beq 3f /* if we're doing < 4 bytes */29andi. r0,r3,2 /* Align buffer to longword boundary */30beq+ 1f31lhz r0,4(r3) /* do 2 bytes to get aligned */32subi r4,r4,233addi r3,r3,234srwi. r6,r4,2 /* # words to do */35adde r5,r5,r036beq 3f371: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */38beq 21f39mtctr r6402: lwzu r0,4(r3)41adde r5,r5,r042bdnz 2b4321: srwi. r6,r4,4 /* # blocks of 4 words to do */44beq 3f45lwz r0,4(r3)46mtctr r647lwz r6,8(r3)48adde r5,r5,r049lwz r7,12(r3)50adde r5,r5,r651lwzu r8,16(r3)52adde r5,r5,r753bdz 23f5422: lwz r0,4(r3)55adde r5,r5,r856lwz r6,8(r3)57adde r5,r5,r058lwz r7,12(r3)59adde r5,r5,r660lwzu r8,16(r3)61adde r5,r5,r762bdnz 22b6323: adde r5,r5,r8643: andi. r0,r4,265beq+ 4f66lhz r0,4(r3)67addi r3,r3,268adde r5,r5,r0694: andi. r0,r4,170beq+ 5f71lbz r0,4(r3)72slwi r0,r0,8 /* Upper byte of word */73adde r5,r5,r0745: addze r3,r5 /* add in final carry */75blr76EXPORT_SYMBOL(__csum_partial)7778/*79* Computes the checksum of a memory block at src, length len,80* and adds in 0xffffffff, while copying the block to dst.81* If an access exception occurs it returns zero.82*83* csum_partial_copy_generic(src, dst, len)84*/85#define CSUM_COPY_16_BYTES_WITHEX(n) \868 ## n ## 0: \87lwz r7,4(r4); \888 ## n ## 1: \89lwz r8,8(r4); \908 ## n ## 2: \91lwz r9,12(r4); \928 ## n ## 3: \93lwzu r10,16(r4); \948 ## n ## 4: \95stw r7,4(r6); \96adde r12,r12,r7; \978 ## n ## 5: \98stw r8,8(r6); \99adde r12,r12,r8; \1008 ## n ## 6: \101stw r9,12(r6); \102adde r12,r12,r9; \1038 ## n ## 7: \104stwu r10,16(r6); \105adde r12,r12,r10106107#define CSUM_COPY_16_BYTES_EXCODE(n) \108EX_TABLE(8 ## n ## 0b, fault); \109EX_TABLE(8 ## n ## 1b, fault); \110EX_TABLE(8 ## n ## 2b, fault); \111EX_TABLE(8 ## n ## 3b, fault); \112EX_TABLE(8 ## n ## 4b, fault); \113EX_TABLE(8 ## n ## 5b, fault); \114EX_TABLE(8 ## n ## 6b, fault); \115EX_TABLE(8 ## n ## 7b, fault);116117.text118119CACHELINE_BYTES = L1_CACHE_BYTES120LG_CACHELINE_BYTES = L1_CACHE_SHIFT121CACHELINE_MASK = (L1_CACHE_BYTES-1)122123_GLOBAL(csum_partial_copy_generic)124li r12,-1125addic r0,r0,0 /* clear carry */126addi r6,r4,-4127neg r0,r4128addi r4,r3,-4129andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */130crset 4*cr7+eq131beq 58f132133cmplw 0,r5,r0 /* is this more than total to do? */134blt 63f /* if not much to do */135rlwinm r7,r6,3,0x8136rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */137cmplwi cr7,r7,0 /* is destination address even ? */138andi. r8,r0,3 /* get it word-aligned first */139mtctr r8140beq+ 61f141li r3,014270: lbz r9,4(r4) /* do some bytes */143addi r4,r4,1144slwi r3,r3,8145rlwimi r3,r9,0,24,3114671: stb r9,4(r6)147addi r6,r6,1148bdnz 70b149adde r12,r12,r315061: subf r5,r0,r5151srwi. r0,r0,2152mtctr r0153beq 58f15472: lwzu r9,4(r4) /* do some words */155adde r12,r12,r915673: stwu r9,4(r6)157bdnz 72b15815958: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */160clrlwi r5,r5,32-LG_CACHELINE_BYTES161li r11,4162beq 63f163164/* Here we decide how far ahead to prefetch the source */165li r3,4166cmpwi r0,1167li r7,0168ble 114f169li r7,1170#if MAX_COPY_PREFETCH > 1171/* Heuristically, for large transfers we prefetch172MAX_COPY_PREFETCH cachelines ahead. For small transfers173we prefetch 1 cacheline ahead. */174cmpwi r0,MAX_COPY_PREFETCH175ble 112f176li r7,MAX_COPY_PREFETCH177112: mtctr r7178111: dcbt r3,r4179addi r3,r3,CACHELINE_BYTES180bdnz 111b181#else182dcbt r3,r4183addi r3,r3,CACHELINE_BYTES184#endif /* MAX_COPY_PREFETCH > 1 */185186114: subf r8,r7,r0187mr r0,r7188mtctr r818919053: dcbt r3,r419154: dcbz r11,r6192/* the main body of the cacheline loop */193CSUM_COPY_16_BYTES_WITHEX(0)194#if L1_CACHE_BYTES >= 32195CSUM_COPY_16_BYTES_WITHEX(1)196#if L1_CACHE_BYTES >= 64197CSUM_COPY_16_BYTES_WITHEX(2)198CSUM_COPY_16_BYTES_WITHEX(3)199#if L1_CACHE_BYTES >= 128200CSUM_COPY_16_BYTES_WITHEX(4)201CSUM_COPY_16_BYTES_WITHEX(5)202CSUM_COPY_16_BYTES_WITHEX(6)203CSUM_COPY_16_BYTES_WITHEX(7)204#endif205#endif206#endif207bdnz 53b208cmpwi r0,0209li r3,4210li r7,0211bne 114b21221363: srwi. r0,r5,2214mtctr r0215beq 64f21630: lwzu r0,4(r4)217adde r12,r12,r021831: stwu r0,4(r6)219bdnz 30b22022164: andi. r0,r5,2222beq+ 65f22340: lhz r0,4(r4)224addi r4,r4,222541: sth r0,4(r6)226adde r12,r12,r0227addi r6,r6,222865: andi. r0,r5,1229beq+ 66f23050: lbz r0,4(r4)23151: stb r0,4(r6)232slwi r0,r0,8233adde r12,r12,r023466: addze r3,r12235beqlr+ cr7236rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */237blr238239fault:240li r3,0241blr242243EX_TABLE(70b, fault);244EX_TABLE(71b, fault);245EX_TABLE(72b, fault);246EX_TABLE(73b, fault);247EX_TABLE(54b, fault);248249/*250* this stuff handles faults in the cacheline loop and branches to either251* fault (if in read part) or fault (if in write part)252*/253CSUM_COPY_16_BYTES_EXCODE(0)254#if L1_CACHE_BYTES >= 32255CSUM_COPY_16_BYTES_EXCODE(1)256#if L1_CACHE_BYTES >= 64257CSUM_COPY_16_BYTES_EXCODE(2)258CSUM_COPY_16_BYTES_EXCODE(3)259#if L1_CACHE_BYTES >= 128260CSUM_COPY_16_BYTES_EXCODE(4)261CSUM_COPY_16_BYTES_EXCODE(5)262CSUM_COPY_16_BYTES_EXCODE(6)263CSUM_COPY_16_BYTES_EXCODE(7)264#endif265#endif266#endif267268EX_TABLE(30b, fault);269EX_TABLE(31b, fault);270EX_TABLE(40b, fault);271EX_TABLE(41b, fault);272EX_TABLE(50b, fault);273EX_TABLE(51b, fault);274275EXPORT_SYMBOL(csum_partial_copy_generic)276277/*278* __sum16 csum_ipv6_magic(const struct in6_addr *saddr,279* const struct in6_addr *daddr,280* __u32 len, __u8 proto, __wsum sum)281*/282283_GLOBAL(csum_ipv6_magic)284lwz r8, 0(r3)285lwz r9, 4(r3)286addc r0, r7, r8287lwz r10, 8(r3)288adde r0, r0, r9289lwz r11, 12(r3)290adde r0, r0, r10291lwz r8, 0(r4)292adde r0, r0, r11293lwz r9, 4(r4)294adde r0, r0, r8295lwz r10, 8(r4)296adde r0, r0, r9297lwz r11, 12(r4)298adde r0, r0, r10299add r5, r5, r6 /* assumption: len + proto doesn't carry */300adde r0, r0, r11301adde r0, r0, r5302addze r0, r0303rotlwi r3, r0, 16304add r3, r0, r3305not r3, r3306rlwinm r3, r3, 16, 16, 31307blr308EXPORT_SYMBOL(csum_ipv6_magic)309310311