/*1* arch/score/lib/csum_partial.S2*3* Score Processor version.4*5* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.6* Lennox Wu <[email protected]>7* Chen Liqin <[email protected]>8*9* This program is free software; you can redistribute it and/or modify10* it under the terms of the GNU General Public License as published by11* the Free Software Foundation; either version 2 of the License, or12* (at your option) any later version.13*14* This program is distributed in the hope that it will be useful,15* but WITHOUT ANY WARRANTY; without even the implied warranty of16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the17* GNU General Public License for more details.18*19* You should have received a copy of the GNU General Public License20* along with this program; if not, see the file COPYING, or write21* to the Free Software Foundation, Inc.,22* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA23*/24#include <linux/linkage.h>2526#define ADDC(sum,reg) \27add sum, sum, reg; \28cmp.c reg, sum; \29bleu 9f; \30addi sum, 0x1; \319:3233#define CSUM_BIGCHUNK(src, offset, sum) \34lw r8, [src, offset + 0x00]; \35lw r9, [src, offset + 0x04]; \36lw r10, [src, offset + 0x08]; \37lw r11, [src, offset + 0x0c]; \38ADDC(sum, r8); \39ADDC(sum, r9); \40ADDC(sum, r10); \41ADDC(sum, r11); \42lw r8, [src, offset + 0x10]; \43lw r9, [src, offset + 0x14]; \44lw r10, [src, offset + 0x18]; \45lw r11, [src, offset + 0x1c]; \46ADDC(sum, r8); \47ADDC(sum, r9); \48ADDC(sum, r10); \49ADDC(sum, r11); \5051#define src r452#define dest r553#define sum r275455.text56/* unknown src alignment and < 8 bytes to go */57small_csumcpy:58mv r5, r1059ldi r9, 0x060cmpi.c r25, 0x161beq pass_small_set_t7 /*already set, jump to pass_small_set_t7*/62andri.c r25,r4 , 0x1 /*Is src 2 bytes aligned?*/6364pass_small_set_t7:65beq aligned66cmpi.c r5, 0x067beq fold68lbu r9, [src]69slli r9,r9, 0x8 /*Little endian*/70ADDC(sum, r9)71addi src, 0x172subi.c r5, 0x17374/*len still a full word */75aligned:76andri.c r8, r5, 0x4 /*Len >= 4?*/77beq len_less_4bytes7879/* Still a full word (4byte) to go,and the src is word aligned.*/80andri.c r8, src, 0x3 /*src is 4bytes aligned, so use LW!!*/81beq four_byte_aligned82lhu r9, [src]83addi src, 284ADDC(sum, r9)85lhu r9, [src]86addi src, 287ADDC(sum, r9)88b len_less_4bytes8990four_byte_aligned: /* Len >=4 and four byte aligned */91lw r9, [src]92addi src, 493ADDC(sum, r9)9495len_less_4bytes: /* 2 byte aligned aligned and length<4B */96andri.c r8, r5, 0x297beq len_less_2bytes98lhu r9, [src]99addi src, 0x2 /* src+=2 */100ADDC(sum, r9)101102len_less_2bytes: /* len = 1 */103andri.c r8, r5, 0x1104beq fold /* less than 2 and not equal 1--> len=0 -> fold */105lbu r9, [src]106107fold_ADDC:108ADDC(sum, r9)109fold:110/* fold checksum */111slli r26, sum, 16112add sum, sum, r26113cmp.c r26, sum114srli sum, sum, 16115bleu 1f /* if r26<=sum */116addi sum, 0x1 /* r26>sum */1171:118/* odd buffer alignment? r25 was set in csum_partial */119cmpi.c r25, 0x0120beq 1f121slli r26, sum, 8122srli sum, sum, 8123or sum, sum, r26124andi sum, 0xffff1251:126.set optimize127/* Add the passed partial csum. */128ADDC(sum, r6)129mv r4, sum130br r3131.set volatile132133.align 5134ENTRY(csum_partial)135ldi sum, 0136ldi r25, 0137mv r10, r5138cmpi.c r5, 0x8139blt small_csumcpy /* < 8(singed) bytes to copy */140cmpi.c r5, 0x0141beq out142andri.c r25, src, 0x1 /* odd buffer? */143144beq word_align145hword_align: /* 1 byte */146lbu r8, [src]147subi r5, 0x1148slli r8, r8, 8149ADDC(sum, r8)150addi src, 0x1151152word_align: /* 2 bytes */153andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */154beq dword_align /* not, maybe dword_align */155lhu r8, [src]156subi r5, 0x2157ADDC(sum, r8)158addi src, 0x2159160dword_align: /* 4bytes */161mv r26, r5 /* maybe useless when len >=56 */162ldi r8, 56163cmp.c r8, r5164bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */165andri.c r26, src, 0x4166beq qword_align167lw r8, [src]168subi r5, 0x4169ADDC(sum, r8)170addi src, 0x4171172qword_align: /* 8 bytes */173andri.c r26, src, 0x8174beq oword_align175lw r8, [src, 0x0]176lw r9, [src, 0x4]177subi r5, 0x8 /* len-=0x8 */178ADDC(sum, r8)179ADDC(sum, r9)180addi src, 0x8181182oword_align: /* 16bytes */183andri.c r26, src, 0x10184beq begin_movement185lw r10, [src, 0x08]186lw r11, [src, 0x0c]187lw r8, [src, 0x00]188lw r9, [src, 0x04]189ADDC(sum, r10)190ADDC(sum, r11)191ADDC(sum, r8)192ADDC(sum, r9)193subi r5, 0x10194addi src, 0x10195196begin_movement:197srli.c r26, r5, 0x7 /* len>=128? */198beq 1f /* len<128 */199200/* r26 is the result that computed in oword_align */201move_128bytes:202CSUM_BIGCHUNK(src, 0x00, sum)203CSUM_BIGCHUNK(src, 0x20, sum)204CSUM_BIGCHUNK(src, 0x40, sum)205CSUM_BIGCHUNK(src, 0x60, sum)206subi.c r26, 0x01 /* r26 equals len/128 */207addi src, 0x80208bne move_128bytes2092101: /* len<128,we process 64byte here */211andri.c r10, r5, 0x40212beq 1f213214move_64bytes:215CSUM_BIGCHUNK(src, 0x00, sum)216CSUM_BIGCHUNK(src, 0x20, sum)217addi src, 0x402182191: /* len<64 */220andri r26, r5, 0x1c /* 0x1c=28 */221andri.c r10, r5, 0x20222beq do_end_words /* decided by andri */223224move_32bytes:225CSUM_BIGCHUNK(src, 0x00, sum)226andri r26, r5, 0x1c227addri src, src, 0x20228229do_end_words: /* len<32 */230/* r26 was set already in dword_align */231cmpi.c r26, 0x0232beq maybe_end_cruft /* len<28 or len<56 */233srli r26, r26, 0x2234235end_words:236lw r8, [src]237subi.c r26, 0x1 /* unit is 4 byte */238ADDC(sum, r8)239addi src, 0x4240cmpi.c r26, 0x0241bne end_words /* r26!=0 */242243maybe_end_cruft: /* len<4 */244andri r10, r5, 0x3245246small_memcpy:247mv r5, r10248j small_csumcpy249250out:251mv r4, sum252br r3253254END(csum_partial)255256257